Python create_batch Exemples, utils.create_batch Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : ARCDataset.py Projet : sidml/reptile-transformer

    def __init__(self, root, imgsz=15):
        super(ARCTrain, self).__init__()
        self.out_rows, self.out_cols = imgsz, imgsz
        task_paths = f'{root}/training/*.json'
        train_x_batch, train_y_batch, val_x_batch,\
            val_y_batch = create_batch(
                glob(task_paths), self.out_rows, self.out_cols)

        task_paths = glob(f'{root}/evaluation/*.json')
        test_task_ids = list(
            map(lambda x: x.split('/')[-1], glob(f'{root}/test/*.json')))
        task_paths = [
            tp for tp in task_paths if tp.split('/')[-1] not in test_task_ids
        ]

        self.train_x_batch, self.train_y_batch, self.val_x_batch,\
            self.val_y_batch = create_batch(
                task_paths, self.out_rows, self.out_cols)

        for q1, q2, q3, q4 in zip(train_x_batch, train_y_batch, val_x_batch,
                                  val_y_batch):
            self.train_x_batch.append(q1)
            self.train_y_batch.append(q2)
            self.val_x_batch.append(q3)
            self.val_y_batch.append(q4)

        self.val_x_batch = np.vstack(np.array(self.val_x_batch))[:, None]
        self.val_y_batch = np.vstack(np.array(self.val_y_batch))[:, None]
        self.val_x_batch = torch.tensor(self.val_x_batch).float().cuda()
        self.val_y_batch = torch.tensor(self.val_y_batch).float().reshape(
            -1, ).cuda()

        print('Number of training tasks', len(self.train_x_batch))
        print('Number of validation tasks', len(self.val_x_batch))

Exemple #2

0

Afficher le fichier

Fichier : predict.py Projet : pg-git-97/masters-thesis

def main():
    print()
    model = int(sys.argv[1])
    file_name = sys.argv[2]
    n_classes = int(sys.argv[3])
    class_list = text_retrieve('class_list.txt')
    val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' +
                           file_name + '-phrase-' + str(n_classes))
    dataset_info = open_file(
        'data/sign-to-gloss/cleaned/split-files/dataset-info-' +
        str(n_classes))
    tar_lines, pred_lines = [], []
    for i in range(0, len(val_phrase)):
        print(i)
        inp, tar = create_batch([val_phrase[i]], dataset_info, n_classes)
        pred = translate(inp, model, n_classes)
        tar, pred = convert_tar_pred(list(tar[0][1:-1]), pred, class_list)
        print('Target phrase: ', tar)
        print('Predict phrase: ', pred)
        print()
        tar_lines.append(tar)
        pred_lines.append(pred)
    tar_text = lines_to_text(tar_lines, '\n')
    pred_text = lines_to_text(pred_lines, '\n')
    text_save(
        tar_text,
        str(n_classes) + '/luong/model_' + str(model) + '/predictions/' +
        file_name + '_tar.txt')
    text_save(
        pred_text,
        str(n_classes) + '/luong/model_' + str(model) + '/predictions/' +
        file_name + '_pred.txt')

Exemple #3

0

Afficher le fichier

Fichier : predict.py Projet : pg-git-97/masters-thesis

def main():
    model = 1
    file_name = 'train'
    n_classes = 100
    val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' +
                           file_name + '-phrase-' + str(n_classes))
    val_info = open_file('data/sign-to-gloss/cleaned/split-files/' +
                         file_name + '-info-' + str(n_classes))
    inp_lines, tar_lines, pred_lines = [], [], []
    for i in range(10, 11):
        inp, tar = create_batch([val_phrase[i]], val_info, n_classes)
        translate(inp, model)
        print(tar)
        """print('Input sentence: ', preprocess_inp_tar(inp))

Exemple #4

0

Afficher le fichier

Fichier : ARCDataset.py Projet : sidml/reptile-transformer

    def __init__(self, root, imgsz=30):

        super(ARCVal, self).__init__()
        self.out_rows, self.out_cols = imgsz, imgsz
        task_paths = glob(f'{root}/evaluation/*.json')
        test_task_ids = list(
            map(lambda x: x.split('/')[-1], glob(f'{root}/test/*.json')))
        task_paths = [
            tp for tp in task_paths if tp.split('/')[-1] in test_task_ids
        ]
        self.train_x_batch, self.train_y_batch, self.val_x_batch,\
            self.val_y_batch = create_batch(task_paths, self.out_rows,
                                            self.out_cols)

        print('Number of training tasks', len(self.train_x_batch))
        print('Number of validation tasks', len(self.val_x_batch))

Exemple #5

0

Afficher le fichier

Fichier : create.py Projet : rdugue/Shifty-API

def handler(event, context):
    print("Received api request: " + json.dumps(event, indent=2))

    if event['body']:
        payload = json.loads(event['body'])
        if type(payload) is list:
            users = []
            for user in payload:
                user['password'] = bcrypt.hash(user['password'])
                users.append(user)
            response = create_batch(users, 'User')
        else:
            payload['password'] = bcrypt.hash(payload['password'])
            response = create_user(payload)
        if 'error' in response:
            return respond(response)
        else:
            return respond(None, {'data': payload})
    else:
        return respond({'error': 'No request body'})

Exemple #6

0

Afficher le fichier

def handler(event, context):
    print("Received api request: " + json.dumps(event, indent=2))

    if event['body']:
        payload = json.loads(event['body'])
        if type(payload) is list:
            shifts = []
            for shift in payload:
                shift['id'] = str(uuid4())
                shift['tradeable'] = False
                shifts.append(shift)
            response = create_batch(shifts, 'Shifts')
        else:
            payload['id'] = str(uuid4())
            payload['tradeable'] = False
            response = create(payload)
        if 'error' in response:
            return respond(response)
        else:
            return respond(None, {'data': payload})
    else:
        return respond({'error': 'No request body'})

Exemple #7

0

Afficher le fichier

Fichier : train.py Projet : pg-git-97/masters-thesis

def main():
    print()
    loc = '/home/preetham/Documents/Preetham/masters-thesis/'
    files = text_retrieve('files_list.txt')
    print('No. of files in original dataset: ', len(files))
    print()
    shuffle(files)
    train, val, test = files[:1000], files[:20], files[:20]
    print('No. of files in training dataset: ', len(train))
    print('No. of files in validation dataset: ', len(val))
    print('No. of files in testing dataset: ', len(test))
    print()
    inp_word_index = open_file(
        'results/grapheme-to-phoneme/luong/model_7/utils/tar-word-index.pkl')
    start_index = 0
    batch_size = 8
    train = train[start_index:start_index + batch_size]
    train_batch_inp, train_batch_tar = create_batch(train, inp_word_index)
    dec_pre_net = DecoderPreNet(256, 0.1)
    print(train_batch_tar.shape)
    print(train_batch_tar[:, :, 0].shape)
    x = dec_pre_net(train_batch_tar[:, 0], False)
    print(x.shape)

Exemple #8

0

Afficher le fichier

    def train(self, x, y, batch_size, is_shuffle, epoch, x_val, y_val, x_test,
              y_test):
        with tf.Session() as sess:
            # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            init_op = tf.initializers.global_variables()
            sess.run(init_op)

            y_tags = [
                self.idx2tag[y_index] for y_sequence in y
                for y_index in y_sequence
            ]
            print('-' * 5 + 'Validation set quality' + '-' * 5)
            self._eval(sess, x, y_tags)
            print('-' * 5 + 'Train set quality' + '-' * 5)
            self._eval(sess, x_val, y_val)
            for i in range(epoch):
                print('start to epoch: %i' % (i + 1))
                for x_batch, y_batch, lens in create_batch(
                        x, y, batch_size, is_shuffle, self.pad_token_index,
                        self.pad_tag_index):
                    feed_dict = {
                        self.input_batch_ph: x_batch,
                        self.label_batch_ph: y_batch,
                        self.sequence_length_ph: lens,
                        self.dropout_ph: self.keep_drop,
                        self.learning_rate_ph: self.learning_rate
                    }
                    sess.run(self.train_op, feed_dict=feed_dict)
                self.learning_rate = self.learning_rate / self.learning_rate_decay
                print('-' * 5 + 'Validation set quality' + '-' * 5)
                self._eval(sess, x, y_tags)
                print('-' * 5 + 'Train set quality' + '-' * 5)
                self._eval(sess, x_val, y_val)

            print('-' * 5 + 'Test set quality' + '-' * 5)
            self._eval(sess, x_test, y_test)

Exemple #9

0

Afficher le fichier

def train(config, params, train_dataset, test_dataset, add_training_log):

    #
    # build batches
    #
    train_batch, test_batch = create_batch(train_dataset,
                                           params['batch_size']), create_batch(
                                               test_dataset,
                                               params['batch_size'])

    #
    # save params
    #
    cPickle.dump(params, open(config['save_params_path'], 'w'))

    #
    # build tensorflow model
    #
    @timeit
    def build_tf_model():
        print('-- build model')
        model = config['model'](**params)
        model.build_model()

        # build custom tensorboard
        print('-- build tensorboard')
        # tensorboard = CustomTensorboard(dir_summary=config['save_summary_path'], model_graph=model.sess.graph,
        #                                 metric=config['metric'])
        tensorboard = None

        return model, tensorboard

    model, tensorboard = build_tf_model()

    #
    # training parameters
    #
    print('-- start training')
    train_count = 0
    eval_count = 0
    best_test = 0.0
    decay_lr_every = 500
    lr_decay = 0.9
    init_lr = model.lr

    #
    # training
    #
    for epoch in range(params['nepochs']):
        print("\n[E]poch %i, lr %.4f" % (epoch + 1, init_lr))
        bar = Bar('training', max=len(train_batch), suffix='')
        tabl = PrettyTable([
            'status', 'train_epoch', 'train_batch', 'test_avg_score',
            'test_avg_loss'
        ])

        loss_total, acc_total = [], []

        for i, batch_id in enumerate(np.random.permutation(len(train_batch))):
            train_count += 1
            batch = train_batch[i]

            loss, acc = model.batch_run(batch=batch,
                                        i=train_count,
                                        mode='train',
                                        metric=config['metric'])

            loss_total.append(loss)
            acc_total.append(acc)

            bar.bar_prefix = " | batch %i |" % (batch_id + 1)
            bar.bar_suffix = " | cur_loss: %.3f | cur_acc: %.3f | best_test_acc: %.3f" % (
                loss, acc, best_test)

            bar.next()

            if train_count % decay_lr_every == 0: init_lr = init_lr * lr_decay

            if train_count % params['freq_eval'] == 0:
                test_loss_lst, test_acc_lst = [], []

                for t_i, t_batch_id in enumerate(range(len(test_batch))):
                    t_batch = test_batch[t_batch_id]
                    eval_count += 1

                    loss, acc = model.batch_run(batch=t_batch,
                                                i=eval_count,
                                                mode='eval',
                                                metric=config['metric'],
                                                lr=init_lr)

                    test_loss_lst.append(loss)
                    test_acc_lst.append(acc)

                test_loss = np.array(test_loss_lst).mean()
                test_acc = np.array(test_acc_lst).mean()

                status = '++++'
                if test_acc > best_test:
                    best_test = test_acc
                    status = 'best'

                    model.save(config['save_model_path'])

                tabl.add_row([
                    status, epoch + 1, batch_id + 1,
                    '%.3f' % test_acc,
                    '%.3f' % test_loss
                ])

        print "\nmean_train_loss: %.3f, mean_train_acc: %.3f" % (
            np.mean(loss_total), np.mean(acc_total))
        print(tabl.get_string(title="Local All Test Accuracies"))

    return best_test, config['saved_path']

Exemple #10

0

Afficher le fichier

    def train_next(self,
                   train_dataset,
                   test_dataset,
                   batch_size,
                   out_path,
                   params={},
                   **kargs):
        # copy existing materials to new path
        if out_path != self.save_path:
            train.copy(self.save_path, out_path)

        save_model_path = os.path.join(out_path, 'model.ckpt')

        # setting some parameters
        n_epoch = params.get('n_epoch', 10)
        freq_eval = params.get('freq_eval', 20)
        metric = params.get('metric', 'f1')

        #
        # build batches
        #
        train_batch, test_batch = create_batch(train_dataset,
                                               batch_size), create_batch(
                                                   test_dataset, batch_size)

        print('-- start training')
        train_count = 0
        eval_count = 0
        best_test = -np.inf
        decay_lr_every = 500
        lr_decay = 0.9
        init_lr = self.model.lr

        #
        # training
        #
        for epoch in range(n_epoch):
            print("\n[E]poch %i, lr %.4f" % (epoch + 1, init_lr))
            bar = Bar('training', max=len(train_batch), suffix='')
            tabl = PrettyTable([
                'status', 'train_epoch', 'train_batch', 'test_avg_score',
                'test_avg_loss'
            ])

            loss_total, acc_total = [], []

            for i, batch_id in enumerate(
                    np.random.permutation(len(train_batch))):
                train_count += 1
                batch = train_batch[i]

                loss, acc = self.model.batch_run(batch=batch,
                                                 i=train_count,
                                                 mode='train',
                                                 metric=metric)

                loss_total.append(loss)
                acc_total.append(acc)

                bar.bar_prefix = " | batch %i |" % (batch_id + 1)
                bar.bar_suffix = " | cur_loss: %.3f | cur_acc: %.3f | best_test_acc: %.3f" % (
                    loss, acc, best_test)

                bar.next()

                if train_count % decay_lr_every == 0:
                    init_lr = init_lr * lr_decay

                if train_count % freq_eval == 0:
                    test_loss_lst, test_acc_lst = [], []

                    for t_i, t_batch_id in enumerate(range(len(test_batch))):
                        t_batch = test_batch[t_batch_id]
                        eval_count += 1

                        loss, acc = self.model.batch_run(batch=t_batch,
                                                         i=eval_count,
                                                         mode='eval',
                                                         metric=metric,
                                                         lr=init_lr)

                        test_loss_lst.append(loss)
                        test_acc_lst.append(acc)

                    test_loss = np.array(test_loss_lst).mean()
                    test_acc = np.array(test_acc_lst).mean()

                    status = '++++'
                    if test_acc > best_test:
                        best_test = test_acc
                        status = 'best'

                        self.model.save(save_model_path)

                    tabl.add_row([
                        status, epoch + 1, batch_id + 1,
                        '%.3f' % test_acc,
                        '%.3f' % test_loss
                    ])

            print "\nmean_train_loss: %.3f, mean_train_acc: %.3f" % (
                np.mean(loss_total), np.mean(acc_total))
            print(tabl.get_string(title="Local All Test Accuracies"))

Exemple #11

0

Afficher le fichier

Fichier : test.py Projet : seu-kse/MLPQ

def main(_):
    FLAGS.checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.dataset)
    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)

    d_labels = FLAGS.dataset.split("_")
    lan_1 = d_labels[1]
    kb1_file = '{}/{}/{}_KB.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_1)
    lan_2 = ""
    for label in d_labels[1:]:
        if label != lan_1:
            lan_2 = label
            break
    kb2_file = '{}/{}/{}_KB.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_2)
    data_file = '{}/{}.txt'.format(FLAGS.data_dir, FLAGS.dataset)

    dir = lan_1 + "_" + lan_2
    is_flip = True if dir in ["zh_fr", "en_fr", "zh_en"] else False

    align_gold_file = '{}/{}/{}_{}_correct.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_2 if is_flip else lan_1,
                                                       lan_1 if is_flip else lan_2)
    align_pred_file_1_2 = '{}/{}/{}_{}_predictions_hits@{}.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_1, lan_2,
                                                                      FLAGS.top_k)
    align_pred_file_2_1 = '{}/{}/{}_{}_predictions_hits@{}.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_2, lan_1,
                                                                      FLAGS.top_k)

    start = time.time()
    print("Loading data...")

    # build and store knowledge bases
    kb1 = KnowledgeBase(kb1_file, name="kb1")
    kb2 = KnowledgeBase(kb2_file, name="kb2")
    multi_kb = MultiKnowledgeBase(kb1, kb2, align_gold_file, is_flip)
    # load predictions from alignment model
    multi_kb.load_pred(align_pred_file_1_2, align_pred_file_2_1)

    q_ids, p_ids, q_strs, p_strs, qw2id, FLAGS.sentence_size, FLAGS.question_words, FLAGS.hops, \
    FLAGS.lan_que, FLAGS.lan_labels, FLAGS.steps = process_dataset(data_file, multi_kb)

    print("Data loading cost {} seconds".format(time.time() - start))

    train_q, test_q, train_p, test_p = train_test_split(q_ids, p_ids, test_size=.1, random_state=123)

    n_testing = test_q.shape[0]
    print("Testing Size", n_testing)

    t_batches = create_batch(n_testing, FLAGS.batch_size)


    if FLAGS.direct_align:
        with tf.Session() as sess:
            model = M_IRN(FLAGS, multi_kb, sess)
            model.load()
            t_preds = model.predict(test_q, test_p, t_batches)
            t_accu, t_al, t_strict = multi_accuracy(test_p, t_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)
            if FLAGS.save:
                recov_name = data_file.strip(".txt") + "_predictions.txt"

            print('-----------------------')
            print('Test is under direct align mode.')
            print('Test Data', data_file)
            print('Test Accuracy:', t_accu, t_al)
            print("Strict test accuracy: ", t_strict)
            if FLAGS.save:
                print("prediction results saved in {}.".format(recov_name))
                recover_predictions(recov_name, t_preds, multi_kb, FLAGS.hops, FLAGS.lan_labels, FLAGS.steps)
            print('-----------------------')

    else:
        if FLAGS.steps[-1] < 8 or FLAGS.top_k == 1:
            t_preds_k = []
            for k in range(FLAGS.top_k):
                FLAGS.this_k_1_2 = FLAGS.this_k_2_1 = k
                print('Building model for top {}...'.format(k + 1))
                with tf.Session() as sess:
                    model = M_IRN(FLAGS, multi_kb, sess)
                    model.load()
                    print('Predicting top {}...'.format(k+1))
                    t_preds_k.append(model.predict(test_q, test_p, t_batches))
                tf.reset_default_graph()
            t_preds_k = np.array(t_preds_k)
            path_accu, strict_accu = k_accuracy(test_p, t_preds_k)
            print('-----------------------')
            print('Test is under align prediction with hits@{} mode(with only one alignment).'.format(FLAGS.top_k))
            print('Test Data', data_file)
            print('Test Path Accuracy:', path_accu)
            print("Strict test accuracy: ", strict_accu)
            print('-----------------------')

        else:
            t_preds_k = []
            for k in range(FLAGS.top_k):
                for j in range(FLAGS.top_k):
                    FLAGS.this_k_1_2 = k
                    FLAGS.this_k_2_1 = j
                    print('Building model for top {} + top {}...'.format(k + 1, j + 1))
                    with tf.Session() as sess:
                        model = M_IRN(FLAGS, multi_kb, sess)
                        model.load()
                        print('Predicting top {} + top {}...'.format(k + 1, j + 1))
                        t_preds_k.append(model.predict(test_q, test_p, t_batches))
                    tf.reset_default_graph()
            t_preds_k = np.array(t_preds_k)
            path_accu, strict_accu = k2_accuracy(test_p, t_preds_k, multi_kb)
            print('-----------------------')
            print('Test is under align prediction with hits@{} mode(with two alignments).'.format(FLAGS.top_k))
            print('Test Data', data_file)
            print('Test Path Accuracy:', path_accu)
            print("Strict test accuracy: ", strict_accu)
            print('-----------------------')

Exemple #12

0

Afficher le fichier

Fichier : fixtures.py Projet : shresthichauhan/sawtooth-core

def setup_batch(request):
    """Setup method for posting batches and returning the 
       response
    """
    data = {}
    signer = get_signer()
    expected_trxn_ids = []
    expected_batch_ids = []
    initial_state_length = len(get_state_list())

    LOGGER.info("Creating intkey transactions with set operations")

    txns = [
        create_intkey_transaction("set", 'a', 0, [], signer),
    ]

    for txn in txns:
        data = MessageToDict(txn,
                             including_default_value_fields=True,
                             preserving_proto_field_name=True)

        trxn_id = data['header_signature']
        expected_trxn_ids.append(trxn_id)

    data['expected_trxn_ids'] = expected_trxn_ids

    LOGGER.info("Creating batches for transactions 1trn/batch")

    batches = [create_batch([txn], signer) for txn in txns]

    for batch in batches:
        data = MessageToDict(batch,
                             including_default_value_fields=True,
                             preserving_proto_field_name=True)

        batch_id = data['header_signature']
        expected_batch_ids.append(batch_id)

    data['expected_batch_ids'] = expected_batch_ids
    data['signer_key'] = signer.get_public_key().as_hex()

    post_batch_list = [
        BatchList(batches=[batch]).SerializeToString() for batch in batches
    ]

    LOGGER.info("Submitting batches to the handlers")

    for batch in post_batch_list:
        try:
            response = post_batch(batch)
        except urllib.error.HTTPError as error:
            LOGGER.info("Rest Api is not reachable")
            data = json.loads(error.fp.read().decode('utf-8'))
            LOGGER.info(data['error']['title'])
            LOGGER.info(data['error']['message'])

    block_list = get_blocks()
    data['block_list'] = block_list
    block_ids = [block['header_signature'] for block in block_list]
    data['block_ids'] = block_ids
    batch_ids = [block['header']['batch_ids'][0] for block in block_list]
    data['batch_ids'] = batch_ids
    expected_head_id = block_ids[0]
    data['expected_head_id'] = expected_head_id
    yield data

Exemple #13

0

Afficher le fichier

Fichier : main.py Projet : jstevens-caps/awd-lstm

    num_batch = 0
    num_words = 0 
    for e in range(1, args.epochs + 1):
        model.train()
        model.reset_hidden()
        train_loss = 0
        train_KL = 0 
        with tqdm(total=len(train_data)) as t:
            for batch in train_data:
                #print("len sentences", sentences)
                if args.tokenized == 1:
                  sentences = batch[0]
                  labels = batch[1]
                  x, y, tags = create_batch(sentences, 
                                                      labels, 
                                                      corpus.vocabulary, 
                                                      tag_ids, 
                                                      device)
                else:
                  x, y = batch
                num_words += x.size(0) 
                num_batch_words = x.size(0)

                # Scale learning rate to sequence length
                # if args.use_var_bptt and not args.no_lr_scaling:
                #     seq_len, _ = x.shape
                #     optimizer.param_groups[0]['lr'] = args.lr * seq_len / args.bptt

                # # # Adjust discriminative learning rates
                # for i in range(len(optimizer.param_groups)):
                #     optimizer.param_groups[i]['lr'] /= args.disc_rate ** i

Exemple #14

0

Afficher le fichier

Fichier : train.py Projet : seu-kse/MLPQ

def main(_):
	FLAGS.checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.dataset)
	if not os.path.exists(FLAGS.checkpoint_dir):
		os.makedirs(FLAGS.checkpoint_dir)

	d_labels = FLAGS.dataset.split("_")
	lan_1 = d_labels[1]
	kb1_file = '{}/{}/{}_KB.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_1)
	lan_2 = ""
	for label in d_labels[1:]:
		if label != lan_1:
			lan_2 = label
			break
	kb2_file = '{}/{}/{}_KB.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_2)
	data_file = '{}/{}.txt'.format(FLAGS.data_dir, FLAGS.dataset)

	dir = lan_1 + "_" + lan_2
	is_flip =  True if dir in ["zh_fr", "en_fr", "zh_en"] else False

	align_gold_file = '{}/{}/{}_{}_correct.txt'.format(FLAGS.data_dir, FLAGS.kb_dir, lan_2 if is_flip else lan_1,
	                                               lan_1 if is_flip else lan_2)

	start = time.time()
	print("Loading data...")

	# build and store knowledge bases
	kb1 = KnowledgeBase(kb1_file, name="kb1")
	kb2 = KnowledgeBase(kb2_file, name="kb2")
	multi_kb = MultiKnowledgeBase(kb1, kb2, align_gold_file, is_flip)

	q_ids, p_ids, q_strs, p_strs, qw2id, FLAGS.sentence_size, FLAGS.question_words, FLAGS.hops, \
	FLAGS.lan_que, FLAGS.lan_labels, FLAGS.steps = process_dataset(data_file, multi_kb)

	print("Data loading cost {} seconds".format(time.time() - start))

	train_q, test_q, train_p, test_p = train_test_split(q_ids, p_ids, test_size=.1, random_state=123)
	train_q, valid_q, train_p, valid_p = train_test_split(train_q, train_p, test_size=.11, random_state=0)

	n_training = train_q.shape[0]
	n_testing = test_q.shape[0]
	n_validation = valid_q.shape[0]

	print("Training Size", n_training)
	print("Validation Size", n_validation)
	print("Testing Size", n_testing)

	# batch_id
	# batches = [(start, end) for start, end in batches] abandon last few examples
	tr_batches = create_batch(n_training, FLAGS.batch_size)
	tr_batches_test = create_batch(n_training, FLAGS.batch_size)
	v_batches =  create_batch(n_validation, FLAGS.batch_size)
	t_batches =  create_batch(n_testing, FLAGS.batch_size)

	kb1_triples = multi_kb.kb1.triples
	kb2_triples = multi_kb.kb2.triples

	with tf.Session() as sess:
		model = M_IRN(FLAGS, multi_kb, sess)

		if FLAGS.resume:
			model.load()

		print("knowledge base 1 size", kb1_triples.shape[0])
		print("knowledge base 2 size", kb2_triples.shape[0])
		kg1_embedding_batches =  create_batch(kb1_triples.shape[0], FLAGS.batch_size)
		kg2_embedding_batches = create_batch(kb2_triples.shape[0], FLAGS.batch_size)
		pre_v_preds = model.predict(valid_q, valid_p, v_batches)
		pre_t_preds = model.predict(test_q, test_p, t_batches)
		best_v_ep = -1
		best_v_pa, best_v_al, best_v_ast = multi_accuracy(valid_p, pre_v_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)
		best_t_pa, best_t_al, best_t_ast = multi_accuracy(test_p, pre_t_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)

		for t in range(1, FLAGS.r_epoch + 1):

			if t - best_v_ep > 50:
				break

			start = time.time()
			np.random.shuffle(tr_batches)

			kg1_embedding_cost = kg2_embedding_cost = 0.0

			print("MIRN multi epoch {} training...".format(t))

			# e_epoch = 100 if t == 1 else FLAGS.e_epoch

			for i in range(1, FLAGS.e_epoch + 1):
				np.random.shuffle(kg1_embedding_batches)
				np.random.shuffle(kg2_embedding_batches)
				kg1_embedding_total_cost = 0.0
				kg2_embedding_total_cost = 0.0
				for s, e in kg1_embedding_batches:
					kg1_embedding_total_cost += model.batch_train_kg1_embedding(kb1_triples[s:e])
				kg1_embedding_cost = kg1_embedding_total_cost
				for s, e in kg2_embedding_batches:
					kg2_embedding_total_cost += model.batch_train_kg2_embedding(kb2_triples[s:e])
				kg2_embedding_cost = kg2_embedding_total_cost

			reasoning_total_cost = 0.0
			for s, e in tr_batches:
				reasoning_total_cost += model.batch_train_inference(train_q[s:e], train_p[s:e])

			tr_preds = model.predict(train_q, train_p, tr_batches_test)
			tr_pa, tr_al, tr_ast = multi_accuracy(train_p, tr_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)
			v_preds = model.predict(valid_q, valid_p, v_batches)
			v_pa, v_al, v_ast = multi_accuracy(valid_p, v_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)
			t_preds = model.predict(test_q, test_p, t_batches)
			t_pa, t_al, t_ast = multi_accuracy(test_p, t_preds, multi_kb, FLAGS.steps, FLAGS.hops, FLAGS.lan_labels)

			if v_ast > best_v_ast:
				best_v_ep = t
				best_v_ast = v_ast
				best_v_pa = v_pa
				best_v_al = v_al
				best_t_ast = t_ast
				best_t_pa = t_pa
				best_t_al = t_al
				model.store()

			print('--------------------------------------------------------------------------------------------'
			      '--------------------------------------------------------------------------------------------')
			print('Epoch', t)
			print('Timing', (time.time() - start))
			print('Embedding total cost for KG1:', kg1_embedding_cost)
			print('Embedding total cost for KG2:', kg2_embedding_cost)
			print('Reasoning total cost:', reasoning_total_cost)
			print('Training Accuracy:', t_ast, tr_pa, tr_al)
			print('Validation Accuracy:', v_ast, v_pa, v_al)
			print('Test Accuracy:', t_ast, t_pa, t_al)
			print('Best Validation epoch & accuracy & path accu & alignment accu:', best_v_ep, best_v_ast, best_v_pa, best_v_al)
			print('Test accuracy under best Validation epoch:', best_t_ast, best_t_pa, best_t_al)
			print('--------------------------------------------------------------------------------------------'
			      '--------------------------------------------------------------------------------------------')