Esempio n. 1
0
def visualizeNetwork(sess, net, C):
    # -*- coding: utf-8 -*-
    # Get handle for vgg model
    vgg, images = data_loader.getVGGhandle()

    # Parse all the vqa question informations
    qa_data = data_loader.load_questions_answers(C.datapath)
    data_validation = qa_data['validation']
    data_training = qa_data['training']
    question_vocab = qa_data['question_vocab']
    answer_vocab = qa_data['answer_vocab']
    reverse_answer_vocab = data_loader.get_reverse_vocab(answer_vocab)
    reverse_quest_vocab = data_loader.get_reverse_vocab(question_vocab)

    train_data_path = os.path.join(C.image_base_path, 'train2014')
    val_data_path = os.path.join(C.image_base_path, 'val2014')
    train_data_generator = data_loader.getNextBatch(sess,
                                                    vgg,
                                                    images,
                                                    data_training,
                                                    question_vocab,
                                                    answer_vocab,
                                                    train_data_path,
                                                    batchSize=1,
                                                    purpose='train')
    valid_data_generator = data_loader.getNextBatch(sess,
                                                    vgg,
                                                    images,
                                                    data_validation,
                                                    question_vocab,
                                                    answer_vocab,
                                                    val_data_path,
                                                    batchSize=1,
                                                    purpose='val')

    save_path = '../vizQnA/'

    for i in range(C.max_visualize):
        batch_question, batch_answer, batch_image_id, batch_features = train_data_generator.next(
        )
        image_path = train_data_path

        image_save_dir = os.path.join(save_path, batch_image_id[0])
        utils.make_dir(image_save_dir)

        [predicted_prob, attn_map_t0,attn_map_t8,attn_map_t17, attn_map_t19,attn_map_t21 ]= sess.run([net.ans_op_prob,net.attn_map_t0 ,net.attn_map_t8,				\
                                net.attn_map_t17,net.attn_map_t19,net.attn_map_t21] , 	\
                                 feed_dict = { net.qs_ip  : batch_question ,					\
                                      net.cnn_ip : batch_features })

        [top_predicted_answer, predicted_answer_prob
         ] = utils.parse_predicted_probabilities(predicted_prob[0],
                                                 C.numAnswer)
        attn_map = [
            attn_map_t0[0], attn_map_t8[0], attn_map_t17[0], attn_map_t19[0],
            attn_map_t21[0]
        ]

        utils.process_results( top_predicted_answer, predicted_answer_prob, attn_map, image_path, batch_question[0], batch_answer[0], \
              batch_image_id[0],  image_save_dir, reverse_quest_vocab, reverse_answer_vocab, purpose='train' )
Esempio n. 2
0
def preprocess_question():
    glove_source = 'data/glove.6B.50d.txt'
    glove_pkl = 'data/glove_6B_50.pkl'
    missing_pkl = 'data/missing_glove_ques.pkl'

    # Prepare Glove Dictionary
    if os.path.isfile(glove_pkl):
        print "Glove dictionary already exists!"
    else:
        if w2g.build_glove_dict(glove_source, glove_pkl) == 0:
            print "COMPLETED: Glove dictionary parsing"

    # Identify missing words in Glove dictionary
    if os.path.isfile(missing_pkl):
        print "Missing question vectors already processed!"
    else:
        # Load Glove Dictionary
        glove_dict = w2g.get_glove_dict(glove_pkl)

        # Load VQA training data
        vqa_data = dl.load_questions_answers('data')
        print "COMPLETED: VQA data retrieval"
        ques_vocab = vqa_data['question_vocab']
        if w2g.build_missing_w2g(ques_vocab, glove_dict, missing_pkl) == 0:
            print "COMPLETED Missing question words identification"
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_lstm_layers',
                        type=int,
                        default=2,
                        help='num_lstm_layers')
    parser.add_argument('--fc7_feature_length',
                        type=int,
                        default=4096,
                        help='fc7_feature_length')
    parser.add_argument('--rnn_size', type=int, default=512, help='rnn_size')
    parser.add_argument('--embedding_size',
                        type=int,
                        default=512,
                        help='embedding_size'),
    parser.add_argument('--word_emb_dropout',
                        type=float,
                        default=0.5,
                        help='word_emb_dropout')
    parser.add_argument('--image_dropout',
                        type=float,
                        default=0.5,
                        help='image_dropout')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=200,
                        help='Batch Size')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Batch Size')
    parser.add_argument('--epochs', type=int, default=200, help='Expochs')
    parser.add_argument('--debug', type=bool, default=False, help='Debug')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Trained Model Path')

    args = parser.parse_args()
    print "Reading QA DATA"
    qa_data = data_loader.load_questions_answers(args)

    print "Reading fc7 features"
    fc7_features, image_id_list = data_loader.load_fc7_features(
        args.data_dir, 'train')
    print "FC7 features", fc7_features.shape
    print "image_id_list", image_id_list.shape

    image_id_map = {}
    for i in xrange(len(image_id_list)):
        image_id_map[image_id_list[i]] = i

    ans_map = {
        qa_data['answer_vocab'][ans]: ans
        for ans in qa_data['answer_vocab']
    }

    model_options = {
        'num_lstm_layers': args.num_lstm_layers,
        'rnn_size': args.rnn_size,
        'embedding_size': args.embedding_size,
        'word_emb_dropout': args.word_emb_dropout,
        'image_dropout': args.image_dropout,
        'fc7_feature_length': args.fc7_feature_length,
        'lstm_steps': qa_data['max_question_length'] + 1,
        'q_vocab_size': len(qa_data['question_vocab']),
        'ans_vocab_size': len(qa_data['answer_vocab'])
    }

    model = vis_lstm_model.Vis_lstm_model(model_options)
    input_tensors, t_loss, t_accuracy, t_p = model.build_model()
    train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(t_loss)
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()

    saver = tf.train.Saver()
    if args.resume_model:
        saver.restore(sess, args.resume_model)

    for i in xrange(args.epochs):
        batch_no = 0

        while (batch_no * args.batch_size) < len(qa_data['training']):
            sentence, answer, fc7 = get_training_batch(batch_no,
                                                       args.batch_size,
                                                       fc7_features,
                                                       image_id_map, qa_data,
                                                       'train')
            _, loss_value, accuracy, pred = sess.run(
                [train_op, t_loss, t_accuracy, t_p],
                feed_dict={
                    input_tensors['fc7']: fc7,
                    input_tensors['sentence']: sentence,
                    input_tensors['answer']: answer
                })
            batch_no += 1
            if args.debug:
                for idx, p in enumerate(pred):
                    print ans_map[p], ans_map[np.argmax(answer[idx])]

                print "Loss", loss_value, batch_no, i
                print "Accuracy", accuracy
                print "---------------"
            else:
                print "Loss", loss_value, batch_no, i
                print "Training Accuracy", accuracy

        save_path = saver.save(sess, "Data/Models/model{}.ckpt".format(i))
Esempio n. 4
0
def main():
    config = json.load(open('config.json'))

    parser = argparse.ArgumentParser()
    parser.add_argument('--num_lstm_layers', type=int, default=2,
                        help='num_lstm_layers')
    parser.add_argument('--fc7_feature_length', type=int, default=4096,
                        help='fc7_feature_length')
    parser.add_argument('--rnn_size', type=int, default=512,
                        help='rnn_size')
    parser.add_argument('--embedding_size', type=int, default=512,
                        help='embedding_size'),
    parser.add_argument('--word_emb_dropout', type=float, default=0.5,
                        help='word_emb_dropout')
    parser.add_argument('--image_dropout', type=float, default=0.5,
                        help='image_dropout')
    parser.add_argument('--qa_dir', type=str, default=config['qa_dir'],
                        help='QA Data directory')
    parser.add_argument('--data_dir', type=str, default=config['data_dir'],
                        help='Common Data directory')
    parser.add_argument('--batch_size', type=int, default=200,
                        help='Batch Size')
    parser.add_argument('--learning_rate', type=float, default=0.001,
                        help='Batch Size')
    parser.add_argument('--epochs', type=int, default=2,
                        help='Expochs')
    parser.add_argument('--debug', type=bool, default=False,
                        help='Debug')
    parser.add_argument('--resume_model', type=str, default=None,
                        help='Trained Model Path')
    parser.add_argument('--version', type=int, default=1,
                        help='VQA data version')

    args = parser.parse_args()
    print("Reading QA DATA")
    qa_data = data_loader.load_questions_answers(args.qa_dir)

    print("Reading fc7 features")
    fc7_features, image_id_list = data_loader.load_fc7_features(args.data_dir, 'train')
    print("FC7 features", fc7_features.shape)
    print("image_id_list", image_id_list.shape)

    image_id_map = {}
    for i in range(len(image_id_list)):
        image_id_map[image_id_list[i]] = i

    ans_map = {qa_data['answer_vocab'][ans]: ans for ans in qa_data['answer_vocab']}

    model_options = {
        'num_lstm_layers': args.num_lstm_layers,
        'rnn_size': args.rnn_size,
        'embedding_size': args.embedding_size,
        'word_emb_dropout': args.word_emb_dropout,
        'image_dropout': args.image_dropout,
        'fc7_feature_length': args.fc7_feature_length,
        'lstm_steps': qa_data['max_question_length'] + 1,
        'q_vocab_size': len(qa_data['question_vocab']),
        'ans_vocab_size': len(qa_data['answer_vocab'])
    }

    model = vis_lstm_model.Vis_lstm_model(model_options)
    input_tensors, t_loss, t_accuracy, t_p = model.build_model()
    train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(t_loss)
    sess = tf.InteractiveSession()
    # tf.initialize_all_variables().run()  # tf.initialize_all_variables() is deprecated since 2017-03-02
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    if args.resume_model:
        saver.restore(sess, args.resume_model)

    acc_file = open('train_acc.txt', 'w', encoding='utf-8')
    acc_file.write('epoch   avg_acc\n')
    for i in range(args.epochs):
        batch_no = 0
        epochs_acc_sum = 0
        while (batch_no * args.batch_size) < len(qa_data['training']):
            sentence, answer, fc7 = get_training_batch(batch_no, args.batch_size, fc7_features, image_id_map, qa_data,
                                                       'train')
            _, loss_value, accuracy, pred = sess.run([train_op, t_loss, t_accuracy, t_p],
                                                     feed_dict={
                                                         input_tensors['fc7']: fc7,
                                                         input_tensors['sentence']: sentence,
                                                         input_tensors['answer']: answer
                                                     }
                                                     )
            batch_no += 1
            if args.debug:
                for idx, p in enumerate(pred):
                    print(ans_map[p], ans_map[np.argmax(answer[idx])])

                print("Loss", loss_value, batch_no, i)
                print("Accuracy", accuracy)
                print("---------------")
                epochs_acc_sum += accuracy
            else:
                print("Loss", loss_value, batch_no, i)
                print("Training Accuracy", accuracy)
                epochs_acc_sum += accuracy
        acc_file.write(str(i) + '   ' + str(epochs_acc_sum/batch_no) + '\n')
        print()
        save_path = saver.save(sess, "Data/Models/model{}.ckpt".format(i))

    acc_file.close()
Esempio n. 5
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--num_lstm_layers', type=int, default=2,
                       help='num_lstm_layers')
	parser.add_argument('--fc7_feature_length', type=int, default=4096,
                       help='fc7_feature_length')
	parser.add_argument('--rnn_size', type=int, default=512,
                       help='rnn_size')
	parser.add_argument('--embedding_size', type=int, default=512,
                       help='embedding_size'),
	parser.add_argument('--word_emb_dropout', type=float, default=0.5,
                       help='word_emb_dropout')
	parser.add_argument('--image_dropout', type=float, default=0.5,
                       help='image_dropout')
	parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
	parser.add_argument('--batch_size', type=int, default=200,
                       help='Batch Size')
	parser.add_argument('--learning_rate', type=float, default=0.001,
                       help='Batch Size')
	parser.add_argument('--epochs', type=int, default=200,
                       help='Expochs')
	parser.add_argument('--debug', type=bool, default=False,
                       help='Debug')
	parser.add_argument('--resume_model', type=str, default=None,
                       help='Trained Model Path')

	args = parser.parse_args()
	print "Reading QA DATA"
	qa_data = data_loader.load_questions_answers(args)
	
	print "Reading fc7 features"
	fc7_features, image_id_list = data_loader.load_fc7_features(args.data_dir, 'train')
	print "FC7 features", fc7_features.shape
	print "image_id_list", image_id_list.shape

	image_id_map = {}
	for i in xrange(len(image_id_list)):
		image_id_map[ image_id_list[i] ] = i

	ans_map = { qa_data['answer_vocab'][ans] : ans for ans in qa_data['answer_vocab']}

	model_options = {
		'num_lstm_layers' : args.num_lstm_layers,
		'rnn_size' : args.rnn_size,
		'embedding_size' : args.embedding_size,
		'word_emb_dropout' : args.word_emb_dropout,
		'image_dropout' : args.image_dropout,
		'fc7_feature_length' : args.fc7_feature_length,
		'lstm_steps' : qa_data['max_question_length'] + 1,
		'q_vocab_size' : len(qa_data['question_vocab']),
		'ans_vocab_size' : len(qa_data['answer_vocab'])
	}
	
	
	
	model = vis_lstm_model.Vis_lstm_model(model_options)
	input_tensors, t_loss, t_accuracy, t_p = model.build_model()
	train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(t_loss)
	sess = tf.InteractiveSession()
	tf.initialize_all_variables().run()

	
	saver = tf.train.Saver()
	if args.resume_model:
		saver.restore(sess, args.resume_model)

	for i in xrange(args.epochs):
		batch_no = 0

		while (batch_no*args.batch_size) < len(qa_data['training']):
			sentence, answer, fc7 = get_training_batch(batch_no, args.batch_size, fc7_features, image_id_map, qa_data, 'train')
			_, loss_value, accuracy, pred = sess.run([train_op, t_loss, t_accuracy, t_p], 
				feed_dict={
					input_tensors['fc7']:fc7,
					input_tensors['sentence']:sentence,
					input_tensors['answer']:answer
				}
			)
			batch_no += 1
			if args.debug:
				for idx, p in enumerate(pred):
					print ans_map[p], ans_map[ np.argmax(answer[idx])]

				print "Loss", loss_value, batch_no, i
				print "Accuracy", accuracy
				print "---------------"
			else:
				print "Loss", loss_value, batch_no, i
				print "Training Accuracy", accuracy
			
		save_path = saver.save(sess, "Data/Models/model{}.ckpt".format(i))
Esempio n. 6
0
def main():
    config = json.load(open('config.json'))

    parser = argparse.ArgumentParser()
    parser.add_argument('--split',
                        type=str,
                        default=config['split'],
                        help='train/val')
    parser.add_argument('--model_path',
                        type=str,
                        default=config['model_path'],
                        help='Pretrained VGG16 Model')
    parser.add_argument('--qa_dir',
                        type=str,
                        default=config['qa_dir'],
                        help='QA Data directory')
    parser.add_argument('--data_dir',
                        type=str,
                        default=config['data_dir'],
                        help='Common Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()

    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, list(opn.values()))

    all_data = data_loader.load_questions_answers(args.qa_dir)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    err_file = open('err.txt', 'w', encoding='utf-8')

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            # print(image_id_list[idx])
            filename = 'COCO_%s2014_%.12d.jpg' % (args.split,
                                                  image_id_list[idx])
            image_file = join(args.data_dir, '%s2014' % args.split, filename)
            try:
                image_batch[i, :, :, :] = utils.load_image_array(image_file)
            except (ValueError, FileNotFoundError, OSError) as e:
                print("http://images.cocodataset.org/%s2014/%s" %
                      (args.split, filename))
                err_file.write(str(image_id_list[idx]) + '\n')
            idx += 1
            count += 1
        err_file.flush()
        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument('--model_path', type=str, help='VGGNet')
    #VGGNet version
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size', type=int, default=100)

    # read pretrained vgg16 network
    args = parser.parse_args()
    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    # load the pretrained network into a tf graph
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("[VGG16] Name", opn.name, list(opn.values()))

    #Loading data
    all_data = data_loader.load_questions_answers()
    print(args)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))
    print(image_id_list[0:10])

    # begin extracting
    sess = tf.Session()
    idx = 0

    cnn7 = np.ndarray((len(image_id_list), 512, 49))
    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        # load images into a batch
        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(
                image_file)[:, :, :3]
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        cnn7_tensor = graph.get_tensor_by_name("import/pool5:0")
        cnn7_batch = sess.run(cnn7_tensor, feed_dict=feed_dict)
        cnn7_batch = np.transpose(cnn7_batch, [0, 3, 1, 2])
        cnn7_batch = cnn7_batch.reshape(count, 512, -1)
        for i in range(args.batch_size):
            cnn7_batch[i, :, :] = cnn7_batch[i, :, :] / np.linalg.norm(
                cnn7_batch[i, :, :], axis=0, keepdims=True)

        cnn7[(idx - count):idx, ...] = cnn7_batch[0:count, ...]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)
        print("Images Processed", idx)

    print("Saving cnn7 features")
    h5f_cnn7 = h5py.File(join(args.data_dir, args.split + '_cnn7.h5'), 'w')
    h5f_cnn7.create_dataset('cnn7_features', data=cnn7)
    h5f_cnn7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Esempio n. 8
0
def train():
    batch_size = 10
    print "Starting ABC-CNN training"
    vqa = dl.load_questions_answers('data')

    # Create subset of data for over-fitting
    sub_vqa = {}
    sub_vqa['training'] = vqa['training'][:10]
    sub_vqa['validation'] = vqa['validation'][:10]
    sub_vqa['answer_vocab'] = vqa['answer_vocab']
    sub_vqa['question_vocab'] = vqa['question_vocab']
    sub_vqa['max_question_length'] = vqa['max_question_length']

    train_size = len(vqa['training'])
    max_itr = (train_size // batch_size) * 10

    with tf.Session() as sess:
        image, ques, ans, optimizer, loss, accuracy = abc.model(
            sess, batch_size)
        print "Defined ABC model"

        train_loader = util.get_batch(sess, vqa, batch_size, 'training')
        print "Created train dataset generator"

        valid_loader = util.get_batch(sess, vqa, batch_size, 'validation')
        print "Created validation dataset generator"

        writer = abc.write_tensorboard(sess)
        init = tf.global_variables_initializer()
        merged = tf.summary.merge_all()
        sess.run(init)
        print "Initialized Tensor variables"

        itr = 1

        while itr < max_itr:
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()

            _, vgg_batch, ques_batch, answer_batch = train_loader.next()
            _, valid_vgg_batch, valid_ques_batch, valid_answer_batch = valid_loader.next(
            )
            sess.run(optimizer,
                     feed_dict={
                         image: vgg_batch,
                         ques: ques_batch,
                         ans: answer_batch
                     })
            [train_summary, train_loss,
             train_accuracy] = sess.run([merged, loss, accuracy],
                                        feed_dict={
                                            image: vgg_batch,
                                            ques: ques_batch,
                                            ans: answer_batch
                                        },
                                        options=run_options,
                                        run_metadata=run_metadata)
            [valid_loss, valid_accuracy] = sess.run(
                [loss, accuracy],
                feed_dict={
                    image: valid_vgg_batch,
                    ques: valid_ques_batch,
                    ans: valid_answer_batch
                })

            writer.add_run_metadata(run_metadata, 'step%03d' % itr)
            writer.add_summary(train_summary, itr)
            writer.flush()
            print "Iteration:%d\tTraining Loss:%f\tTraining Accuracy:%f\tValidation Loss:%f\tValidation Accuracy:%f" % (
                itr, train_loss, 100. * train_accuracy, valid_loss,
                100. * valid_accuracy)
            itr += 1
Esempio n. 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split',
                        type=str,
                        default='train',
                        help='train/val/test')
    parser.add_argument('--model_path',
                        type=str,
                        default='./Data/ResNet/resnet_v2_101.ckpt',
                        help='Pretrained RESNET Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()

    slim = tf.contrib.slim
    resnet = nets.resnet_v2

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())

    if args.split == 'test':
        all_data = data_loader.load_test_questions()
        qa_data = all_data['testing']
    else:
        all_data = data_loader.load_questions_answers(args)
        if args.split == "train":
            qa_data = all_data['training']
        else:
            qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    length = 100 if args.split == 'test' else len(image_id_list)
    res5c = np.ndarray((length, 2048))
    idx = 0
    SIZE = 299
    flag = 0

    while idx < length:
        if idx % 500 == 0:
            flag = 0
            tf.reset_default_graph()
        with tf.Graph().as_default():
            with tf.Session() as sess:
                while idx < length:
                    start = time.clock()
                    image_batch = np.ndarray((args.batch_size, SIZE, SIZE, 3),
                                             dtype=np.float32)

                    count = 0
                    for i in range(0, args.batch_size):
                        if idx >= len(image_id_list):
                            break
                        if args.split == 'test':
                            image_file = join(
                                args.data_dir, '%s2015/COCO_%s2015_%.12d.jpg' %
                                (args.split, args.split, image_id_list[idx]))
                        else:
                            image_file = join(
                                args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' %
                                (args.split, args.split, image_id_list[idx]))
                        image_batch[i, :, :, :] = utils.load_image_array(
                            image_file, size=SIZE)
                        idx += 1
                        count += 1

                    with slim.arg_scope(resnet.resnet_arg_scope()):
                        logits, end_points = resnet.resnet_v2_101(
                            image_batch[0:count, :, :, :],
                            num_classes=None,
                            is_training=False,
                            reuse=tf.AUTO_REUSE)
                        if not flag:
                            vals = slim.get_model_variables('resnet_v2_101')
                            init_fn = slim.assign_from_checkpoint_fn(
                                args.model_path, vals)
                            init_fn(sess)
                            flag = 1

                    res5c_batch = sess.run([logits])
                    if idx % 10 == 0:
                        res5c_batch = res5c_batch[0].reshape(
                            (args.batch_size, 2048))
                    else:
                        res5c_batch = res5c_batch[0].reshape(
                            (idx % args.batch_size, 2048))
                    res5c[(idx - count):idx, :] = res5c_batch[0:count, :]
                    end = time.clock()
                    print("Time for batch 10 photos", end - start)
                    print("Hours For Whole Dataset",
                          (len(image_id_list) * 1.0) * (end - start) / 60.0 /
                          60.0 / 10.0)

                    print("Images Processed", idx)
                    if idx % 500 == 0:
                        break

    print("Saving res5c features")
    h5f_res5c = h5py.File(join(args.data_dir, args.split + '_res5c.h5'), 'w')
    h5f_res5c.create_dataset('res5c_features', data=res5c)
    h5f_res5c.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Esempio n. 10
0
                    help='network type iBOWIMG|HieCoAtten')
parser.add_argument('--use_soft',
                    action='store_true',
                    default=False,
                    help='using soft cross entropy')
args = parser.parse_args()

data_dir = args.data_dir
batch_size = args.batch_size
num_epochs = args.num_epochs
network = args.network
use_soft = args.use_soft

# Load QA Data
print("Reading QA DATA")
qa_data = load_questions_answers(data_dir=data_dir)
print("train questions", len(qa_data['train']))
print("val questions", len(qa_data['val']))
print("answer vocab", len(qa_data['answer_vocab']))
print("question vocab", len(qa_data['question_vocab']))
print("max question length", qa_data['max_question_length'])

# Define Data Loader
data_splits = ('train', 'val')
pdb.set_trace()
if network == 'iBOWIMG':
    feature_type = 'vgg19Fc'
elif network == 'HieCoAtten':
    feature_type = 'vgg19TwoBlocks'

datasets = {
Esempio n. 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_lstm_layers', type=int, default=2,
                       help='num_lstm_layers')
    parser.add_argument('--cnn7_feature_length', type=int, default=512,
                       help='cnn7_feature_length')
    parser.add_argument('--rnn_size', type=int, default=512,
                       help='rnn_size')
    parser.add_argument('--embedding_size', type=int),
    parser.add_argument('--word_emb_dropout', type=float)
    parser.add_argument('--image_dropout', type=float)
    parser.add_argument('--data_dir', type=str)
    parser.add_argument('--batch_size', type=int, default=100,
                       help='Batch Size')
    parser.add_argument('--learning_rate', type=float, default=0.1,
                       help='Batch Size')
    parser.add_argument('--epochs', type=int, default=400,
                       help='Expochs')
    parser.add_argument('--debug', type=bool, default=False,
                       help='Debug')
    parser.add_argument('--resume_model', type=str, default=None,
                       help='Trained Model Path')
    parser.add_argument('--version', type=int, default=2,
                       help='VQA data version')

    args = parser.parse_args()
    print("Reading QA DATA")
    qa_data = data_loader.load_questions_answers(args.version, args.data_dir)
    
    print("Reading cnn7 features")
    cnn7_features, image_id_list = data_loader.load_cnn7_features(args.data_dir, 'train')
    print("cnn7 features", cnn7_features.shape)
    print("image_id_list", image_id_list.shape)

    image_id_map = {}
    for i in range(len(image_id_list)):
        image_id_map[ image_id_list[i] ] = i

    ans_map = { qa_data['answer_vocab'][ans] : ans for ans in qa_data['answer_vocab']}

    model_options = {
        'num_lstm_layers' : args.num_lstm_layers,
        'rnn_size' : args.rnn_size,
        'embedding_size' : args.embedding_size,
        'word_emb_dropout' : args.word_emb_dropout,
        'image_dropout' : args.image_dropout,
        'cnn7_feature_length' : args.cnn7_feature_length,
        'lstm_steps' : qa_data['max_question_length'] + 1,
        'q_vocab_size' : len(qa_data['question_vocab']),
        'ans_vocab_size' : len(qa_data['answer_vocab'])
    }
    
    
    
    model = vis_lstm_model.Vis_lstm_model(model_options)
    input_tensors, t_loss, t_accuracy, t_p = model.build_model()
    train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(t_loss)
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()

    saver = tf.train.Saver()
    if args.resume_model:
        saver.restore(sess, args.resume_model)
        last_epoch = int(args.resume_model[-7:-5])
        print(f'I resume Epoch {last_epoch}')
    else:
        last_epoch = int(-1)
        
    for i in range(args.epochs):
        batch_no = 0
        batch_acc_record = []

        while batch_no < 220:
            start = time.clock()

            sentence, answer, cnn7 = get_training_batch(batch_no, args.batch_size, cnn7_features, image_id_map, qa_data, 'train')
            _, loss_value, accuracy, pred = sess.run([train_op, t_loss, t_accuracy, t_p], 
                feed_dict={
                    input_tensors['cnn7']:cnn7,
                    input_tensors['sentence']:sentence,
                    input_tensors['answer']:answer
                }
            )
            batch_acc_record.append(accuracy)
            batch_no += 1
            if args.debug:
                for idx, p in enumerate(pred):
                    print(ans_map[p], ans_map[ np.argmax(answer[idx])])

                print("Loss", loss_value, batch_no, i + 1 + last_epoch)
                print("Accuracy", accuracy)
                print("---------------")
            else:
                print("Loss", loss_value, batch_no, i + 1 + last_epoch)
                print("Training Accuracy", accuracy)

            end = time.clock()
            print("Time for one batch", end - start)
            print("Hours For one epoch" , (291 * 1.0)*(end - start)/60.0/60.0)
            
        save_path = saver.save(sess, "Data/Models/model{}.ckpt".format(i + 1 + last_epoch))

        if np.mean(batch_acc_record)>=0.9:
            break
            
    sess.close()
Esempio n. 12
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--split', type=str, default='train',
                       help='train/val')
	parser.add_argument('--model_path', type=str, default='Data/vgg16.tfmodel',
                       help='Pretrained VGG16 Model')
	parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
	parser.add_argument('--batch_size', type=int, default=10,
                       help='Batch Size')
	


	args = parser.parse_args()
	
	vgg_file = open(args.model_path)
	vgg16raw = vgg_file.read()
	vgg_file.close()

	graph_def = tf.GraphDef()
	graph_def.ParseFromString(vgg16raw)

	images = tf.placeholder("float", [None, 224, 224, 3])
	tf.import_graph_def(graph_def, input_map={ "images": images })

	graph = tf.get_default_graph()

	for opn in graph.get_operations():
		print "Name", opn.name, opn.values()

	all_data = data_loader.load_questions_answers(args)
	if args.split == "train":
		qa_data = all_data['training']
	else:
		qa_data = all_data['validation']
	
	image_ids = {}
	for qa in qa_data:
		image_ids[qa['image_id']] = 1

	image_id_list = [img_id for img_id in image_ids]
	print "Total Images", len(image_id_list)
	
	
	sess = tf.Session()
	fc7 = np.ndarray( (len(image_id_list), 4096 ) )
	idx = 0

	while idx < len(image_id_list):
		start = time.clock()
		image_batch = np.ndarray( (args.batch_size, 224, 224, 3 ) )

		count = 0
		for i in range(0, args.batch_size):
			if idx >= len(image_id_list):
				break
			image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(args.split, args.split, image_id_list[idx]) )
			image_batch[i,:,:,:] = utils.load_image_array(image_file)
			idx += 1
			count += 1
		
		
		feed_dict  = { images : image_batch[0:count,:,:,:] }
		fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
		fc7_batch = sess.run(fc7_tensor, feed_dict = feed_dict)
		fc7[(idx - count):idx, :] = fc7_batch[0:count,:]
		end = time.clock()
		print "Time for batch 10 photos", end - start
		print "Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0

		print "Images Processed", idx

		

	print "Saving fc7 features"
	h5f_fc7 = h5py.File( join(args.data_dir, args.split + '_fc7.h5'), 'w')
	h5f_fc7.create_dataset('fc7_features', data=fc7)
	h5f_fc7.close()

	print "Saving image id list"
	h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
	h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
	h5f_image_id_list.close()
	print "Done!"
Esempio n. 13
0
def main():
    # Set arguments to get file from directories
    parser = argparse.ArgumentParser()
    parser.add_argument('/home/vmhatre/vqa_supervised/Data/train/val')
    parser.add_argument(
        '--model_path',
        type=str,
        default='/home/vmhatre/vqa_supervised/Data/vgg16.tfmodel')
    parser.add_argument('--data_dir',
                        type=str,
                        default='/home/vmhatre/vqa_supervised/Data')
    parser.add_argument('--batch_size', type=int, default=10)

    args = parser.parse_args()

    vgg_file = open(args.model_path)
    vgg16raw = vgg_file.read()
    vgg_file.close()
    #using a connected graph with data type GraphDef from tensorflow to find connected componenets #within features if any

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)
    #Creating image input variable with 512*512 features as tf placeholder object
    images = tf.placeholder("float", [None, 512, 512, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    #Get default connected components
    graph = tf.get_default_graph()
    # Defining name-value pair to retreive graphs by operation name
    for opn in graph.get_operations():
        print "Name", opn.name, opn.values()
# Get data from training and validation files
    all_data = data_loader.load_questions_answers(args)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']
#Evaluating total images before building/testing model
    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    #print "Total Images", len(image_id_list)

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0
    #For every pixel in size 512*512 storing features in image_batch
    while idx < len(image_id_list):
        image_batch = np.ndarray((args.batch_size, 512, 512, 3))
        #for every image in dataset load image in imagebatch file using load_image_array
        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(image_file)
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        #Define a Rectified Linear Unit (ReLU)  to store Graph of images which we then mulitply by feed images
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]


#Saving fc7 features after extracting from ReLU and image
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print "Saving image id list"
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--residual_channels',
                        type=int,
                        default=512,
                        help='residual_channels')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Batch Size')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Batch Size')
    parser.add_argument('--epochs', type=int, default=25, help='Expochs')
    parser.add_argument('--max_steps',
                        type=int,
                        default=50000,
                        help='max steps, set 1 for evaluating the model')
    parser.add_argument('--version',
                        type=int,
                        default=1,
                        help='VQA data version')
    parser.add_argument('--sample_every',
                        type=int,
                        default=200,
                        help='Debug every x iterations')
    parser.add_argument('--evaluate_every',
                        type=int,
                        default=6000,
                        help='Evaluate every x steps')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Trained Model Path')
    parser.add_argument('--training_log_file',
                        type=str,
                        default='Data/training_log.json',
                        help='Log file for accuracy')
    parser.add_argument('--feature_layer',
                        type=str,
                        default="block4",
                        help='CONV FEATURE LAYER, fc7, pool5 or block4')
    parser.add_argument('--cnn_model',
                        type=str,
                        default="resnet",
                        help='CNN model')
    parser.add_argument('--text_model',
                        type=str,
                        default="bytenet",
                        help='bytenet/lstm')

    # evaluation_steps = [6000, 12000, 18000, 25000, 30000, 35000, 50000]
    # evaluation_steps = [400, 800, 1200, 1600, 2000, 2400, 2800]
    args = parser.parse_args()

    print "Reading QA DATA", args.version
    qa_data = data_loader.load_questions_answers(args.version, args.data_dir)
    shuffle(qa_data['training'])
    shuffle(qa_data['validation'])

    ans_vocab_rev = qa_data['index_to_ans']
    ques_vocab_rev = qa_data['index_to_qw']

    print "Reading conv features"
    conv_features, image_id_list = data_loader.load_conv_features(
        'train', args.cnn_model, args.feature_layer)
    # image_id_map = {image_id_list[i] : i for i in xrange(len(image_id_list))}
    image_id_map = {image_id_list[i]: i for i in xrange(len(image_id_list))}

    conv_features_val, image_id_list_val = data_loader.load_conv_features(
        'val', args.cnn_model, args.feature_layer)
    image_id_map_val = {
        image_id_list_val[i]: i
        for i in xrange(len(image_id_list_val))
    }

    conv_features = data_loader.load_conv_features('train',
                                                   args.cnn_model,
                                                   args.feature_layer,
                                                   load_image_list=False)

    model_options = {
        'question_vocab_size': len(qa_data['index_to_qw']),
        'residual_channels': args.residual_channels,
        'ans_vocab_size': len(qa_data['index_to_ans']),
        'filter_width': 3,
        'img_dim': 14,
        'img_channels': 2048,
        'dilations': [
            1,
            2,
            4,
            8,
            1,
            2,
            4,
            8,
        ],
        'text_model': args.text_model,
        'dropout_keep_prob': 0.6,
        'max_question_length': qa_data['max_question_length'],
        'num_answers': 10
    }

    print "MODEL OPTIONS"
    print model_options

    model = VQA_model_attention.VQA_model(model_options)
    model.build_model()
    train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(model.loss)
    model.build_generator(reuse=True)

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()

    saver = tf.train.Saver()
    if args.resume_model:
        saver.restore(sess, args.resume_model)

    step = 0
    training_log = []

    for epoch in xrange(args.epochs):
        batch_no = 0
        while (batch_no * args.batch_size) < len(qa_data['training']):
            start = time.clock()
            question, answer, image_features, image_ids, _ = get_batch(
                batch_no, args.batch_size, qa_data['training'], conv_features,
                image_id_map, 'train', model_options)

            _, loss_value = sess.run(
                [train_op, model.loss],
                feed_dict={
                    model.question: question,
                    model.image_features: image_features,
                    model.answers: answer
                })
            end = time.clock()
            print "Time for batch of photos", end - start
            print "Time for one epoch (mins)", len(
                qa_data['training']) / args.batch_size * (end - start) / 60.0
            batch_no += 1
            step += 1

            print "LOSS", loss_value, batch_no, len(
                qa_data) / args.batch_size, step, epoch
            print "****"
            if step % args.sample_every == 0:
                try:
                    shutil.rmtree('Data/samples')
                except:
                    pass

                os.makedirs('Data/samples')

                pred_answer, prob1, prob2 = sess.run(
                    [model.g_predictions, model.g_prob1, model.g_prob2],
                    feed_dict={
                        model.g_question: question,
                        model.g_image_features: image_features
                    })
                pred_ans_text = utils.answer_indices_to_text(
                    pred_answer, ans_vocab_rev)
                # just a sample
                actual_ans_text = utils.answer_indices_to_text(
                    answer[:, 0], ans_vocab_rev)
                sample_data = []
                print "Actual vs Prediction"
                for sample_i in range(len(pred_ans_text)):
                    print actual_ans_text[sample_i], pred_ans_text[sample_i]
                    question_text = utils.question_indices_to_text(
                        question[sample_i], ques_vocab_rev)
                    image_array = utils.image_array_from_image_id(
                        image_ids[sample_i], 'train')
                    blend1 = utils.get_blend_map(image_array,
                                                 prob1[sample_i],
                                                 overlap=True)
                    blend2 = utils.get_blend_map(image_array,
                                                 prob2[sample_i],
                                                 overlap=True)
                    sample_data.append({
                        'question':
                        question_text,
                        'actual_answer':
                        actual_ans_text[sample_i],
                        'predicted_answer':
                        pred_ans_text[sample_i],
                        'image_id':
                        image_ids[sample_i],
                        'batch_index':
                        sample_i
                    })
                    misc.imsave(
                        'Data/samples/{}_actual_image.jpg'.format(sample_i),
                        image_array)
                    misc.imsave('Data/samples/{}_blend1.jpg'.format(sample_i),
                                blend1)
                    misc.imsave('Data/samples/{}_blend2.jpg'.format(sample_i),
                                blend2)

                f = open('Data/samples/sample.json', 'wb')
                f.write(json.dumps(sample_data))
                f.close()
                shutil.make_archive('Data/samples', 'zip', 'Data/samples')
                gc.collect()

            if step % args.evaluate_every == 0:
                accuracy = evaluate_model(model, qa_data, args, model_options,
                                          sess, conv_features_val,
                                          image_id_map_val)
                print "ACCURACY>> ", accuracy, step, epoch
                training_log.append({
                    'step': step,
                    'epoch': epoch,
                    'accuracy': accuracy,
                })
                f = open(args.training_log_file, 'wb')
                f.write(json.dumps(training_log))
                f.close()

                save_path = saver.save(
                    sess,
                    "Data/Models{}/model{}.ckpt".format(args.version, epoch))
                gc.collect()
                # to avoid h5py from slowing down.
                conv_features = data_loader.load_conv_features(
                    'train',
                    args.cnn_model,
                    args.feature_layer,
                    load_image_list=False)

            if step >= args.max_steps:
                break
Esempio n. 15
0
def main():
	parser = argparse.ArgumentParser()  # argparse 是 Python 内置的一个用于命令项选项与参数解析的模块,\
					    # 通过在程序中定义好我们需要的参数,argparse 将会从 sys.argv 中解析出这些参数,并自动生成帮助和使用信息
	parser.add_argument('--num_lstm_layers', type=int, default=2,
                       help='num_lstm_layers')
	parser.add_argument('--fc7_feature_length', type=int, default=4096,
                       help='fc7_feature_length')
	parser.add_argument('--rnn_size', type=int, default=512,
                       help='rnn_size')
	parser.add_argument('--embedding_size', type=int, default=512,
                       help='embedding_size'),
	parser.add_argument('--word_emb_dropout', type=float, default=0.5,
                       help='word_emb_dropout')
	parser.add_argument('--image_dropout', type=float, default=0.5,
                       help='image_dropout')
	parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
	parser.add_argument('--batch_size', type=int, default=200,
                       help='Batch Size')
	parser.add_argument('--learning_rate', type=float, default=0.001,
                       help='Batch Size')
	parser.add_argument('--epochs', type=int, default=200,
                       help='Expochs')
	parser.add_argument('--debug', type=bool, default=False,
                       help='Debug')
	parser.add_argument('--resume_model', type=str, default=None,
                       help='Trained Model Path')
	parser.add_argument('--version', type=int, default=2,
                       help='VQA data version')

	args = parser.parse_args() # Get the two attributes, integers and accumulate.
	print("Reading QA DATA") 
	#存了些什么样子的数据?函数返回的数据结构啥样子= qa_data的结构啥样子。
	qa_data = data_loader.load_questions_answers(args.version, args.data_dir)
	
	print("Reading fc7 features")
	
	#下面data_loader提取到的就是feature和id,但是dataloader应该还没有经过training,如何得到的?
	#data_loader的到的image_id_list是什么样子的?
	fc7_features, image_id_list = data_loader.load_fc7_features(args.data_dir, 'train')
	print("FC7 features", fc7_features.shape)
	print("image_id_list", image_id_list.shape)

	image_id_map = {}  #得到的是image_id名字对应的id数字;数据类型为字典 
	for i in range(len(image_id_list)):
		image_id_map[ image_id_list[i] ] = i
	
	# 为啥需要一个ans_map这样的字典?
	# 这里面的ans是什么东西,以及qa_data['answer_vocab'][ans]的数据结构为何会是这样?
	ans_map = { qa_data['answer_vocab'][ans] : ans for ans in qa_data['answer_vocab']}
	
	#下面这个是配置好TensorFlow初始化的参数。
	model_options = {
		'num_lstm_layers' : args.num_lstm_layers,
		'rnn_size' : args.rnn_size,
		'embedding_size' : args.embedding_size,
		'word_emb_dropout' : args.word_emb_dropout,
		'image_dropout' : args.image_dropout,
		'fc7_feature_length' : args.fc7_feature_length,
		'lstm_steps' : qa_data['max_question_length'] + 1,
		'q_vocab_size' : len(qa_data['question_vocab']),
		'ans_vocab_size' : len(qa_data['answer_vocab'])
	}
	
	
	#下面这里几句话对TensorFLow进行了初始化与调用。
	model = vis_lstm_model.Vis_lstm_model(model_options)# 初始化TensorFlow
	input_tensors, t_loss, t_accuracy, t_p = model.build_model() # Get the results of the Neural Network Model(LSTM)
	train_op = tf.train.AdamOptimizer(args.learning_rate).minimize(t_loss) # Use Adam to get better learning rate
	sess = tf.InteractiveSession() # Get into the interactive session, I think here just open a window or sth to display sth.
	tf.initialize_all_variables().run()

	# I think here is the interrupt processing. if resume from previous process, resume with previous process results.
	saver = tf.train.Saver()
	if args.resume_model:
		saver.restore(sess, args.resume_model)

	for i in range(args.epochs):
		batch_no = 0

		while (batch_no*args.batch_size) < len(qa_data['training']): # batch_no*args.batch_size = the total number of elements 
			#in training set that has been explored.
			#Get the batch of the training set.
			sentence, answer, fc7 = get_training_batch(batch_no, args.batch_size, fc7_features, image_id_map, qa_data, 'train')
			_, loss_value, accuracy, pred = sess.run([train_op, t_loss, t_accuracy, t_p], 
				feed_dict={
					input_tensors['fc7']:fc7,
					input_tensors['sentence']:sentence,
					input_tensors['answer']:answer
				}
			)  ### The whole part just store all the parameters into tensorflow inner class! ###
			batch_no += 1
			if args.debug:
				for idx, p in enumerate(pred):
					print(ans_map[p], ans_map[ np.argmax(answer[idx])])

				print("Loss", loss_value, batch_no, i)
				print("Accuracy", accuracy)
				print("---------------")
			else:
				print("Loss", loss_value, batch_no, i)
				print("Training Accuracy", accuracy)
			
		save_path = saver.save(sess, "Data/Models/model{}.ckpt".format(i))
Esempio n. 16
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--num_lstm_layers', type=int, default=2,
                       help='num_lstm_layers')
	parser.add_argument('--fc7_feature_length', type=int, default=4096,
                       help='fc7_feature_length')
	parser.add_argument('--rnn_size', type=int, default=512,
                       help='rnn_size')
	parser.add_argument('--embedding_size', type=int, default=512,
                       help='embedding_size'),
	parser.add_argument('--word_emb_dropout', type=float, default=0.5,
                       help='word_emb_dropout')
	parser.add_argument('--image_dropout', type=float, default=0.5,
                       help='image_dropout')
	parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
	parser.add_argument('--batch_size', type=int, default=200,
                       help='Batch Size')
	parser.add_argument('--learning_rate', type=float, default=0.001,
                       help='Batch Size')
	parser.add_argument('--epochs', type=int, default=200,
                       help='Expochs')
	parser.add_argument('--debug', type=bool, default=False,
                       help='Debug')
	parser.add_argument('--model_path', type=str, default = 'Data/Models/model21.ckpt',
                       help='Model Path')

	args = parser.parse_args()
	print "Reading QA DATA"
	qa_data = data_loader.load_questions_answers(args)
	
	print "Reading fc7 features"
	fc7_features, image_id_list = data_loader.load_fc7_features(args.data_dir, 'val')
	print "FC7 features", fc7_features.shape
	print "image_id_list", image_id_list.shape

	image_id_map = {}
	for i in xrange(len(image_id_list)):
		image_id_map[ image_id_list[i] ] = i

	ans_map = { qa_data['answer_vocab'][ans] : ans for ans in qa_data['answer_vocab']}

	model_options = {
		'num_lstm_layers' : args.num_lstm_layers,
		'rnn_size' : args.rnn_size,
		'embedding_size' : args.embedding_size,
		'word_emb_dropout' : args.word_emb_dropout,
		'image_dropout' : args.image_dropout,
		'fc7_feature_length' : args.fc7_feature_length,
		'lstm_steps' : qa_data['max_question_length'] + 1,
		'q_vocab_size' : len(qa_data['question_vocab']),
		'ans_vocab_size' : len(qa_data['answer_vocab'])
	}
	
	
	
	model = vis_lstm_model.Vis_lstm_model(model_options)
	input_tensors, t_prediction, t_ans_probab = model.build_generator()
	sess = tf.InteractiveSession()
	saver = tf.train.Saver()

	avg_accuracy = 0.0
	total = 0
	saver.restore(sess, args.model_path)
	
	batch_no = 0
	while (batch_no*args.batch_size) < len(qa_data['validation']):
		sentence, answer, fc7 = get_batch(batch_no, args.batch_size, 
			fc7_features, image_id_map, qa_data, 'val')
		
		pred, ans_prob = sess.run([t_prediction, t_ans_probab], feed_dict={
            input_tensors['fc7']:fc7,
            input_tensors['sentence']:sentence,
        })
		
		batch_no += 1
		if args.debug:
			for idx, p in enumerate(pred):
				print ans_map[p], ans_map[ np.argmax(answer[idx])]

		correct_predictions = np.equal(pred, np.argmax(answer, 1))
		correct_predictions = correct_predictions.astype('float32')
		accuracy = correct_predictions.mean()
		print "Acc", accuracy
		avg_accuracy += accuracy
		total += 1
	
	print "Acc", avg_accuracy/total
Esempio n. 17
0
def main():
    print "Total Images"
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train',
                        help='train/val')
    parser.add_argument('--data_dir', type=str, default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size', type=int, default=10,
                        help='Batch Size')

    print "Total Images"
    args = parser.parse_args()
    print "Total Images"
    data_loader.prepare_training_data(version=1);
    all_data = data_loader.load_questions_answers(version=1);
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print "Total Images", len(image_id_list)

    model = VGG16(weights='imagenet', include_top=False, outputs=base_model.get_layer('Conv2D').output)

    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(args.data_dir,
                              '%s2014/COCO_%s2014_%.12d.jpg' % (args.split, args.split, image_id_list[idx]))

            image_batch[i, :, :, :] = utils.load_image_array(image_file)

            x = np.expand_dims(image_batch[i, :, :, :], axis=0)
            x = preprocess_input(x)

            features = model.predict(x)
            fc7_batch[i, :] = features


            idx += 1
            count += 1

        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print "Time for batch 10 photos", end - start
        print "Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0

        print "Images Processed", idx

    print "Saving fc7 features"
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print "Saving image id list"
    h5f_image_id_list = h5py.File(join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print "Done!"
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument('--model_path',
                        type=str,
                        default='Data/vgg16.tfmodel',
                        help='Pretrained VGG16 Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()
    #print(args.model_path)
    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, list(opn.values()))

    #Loading data


# 	data_loader.prepare_training_data(version = 2, data_dir = 'Data')
    all_data = data_loader.load_questions_answers()
    print(args)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(
                image_file)[:, :, :3]
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Esempio n. 19
0
def trainNetwork(sess, net, num_epochs, C, saver_all):
    # -*- coding: utf-8 -*-
    # Get handle for vgg model
    vgg, images = data_loader.getVGGhandle()

    # Parse all the vqa question informations
    qa_data = data_loader.load_questions_answers(C.datapath)
    data_validation = qa_data['validation']
    data_training = qa_data['training']
    question_vocab = qa_data['question_vocab']
    answer_vocab = qa_data['answer_vocab']

    bp()
    question_input_dim = len(question_vocab)
    answer_out_dim = len(answer_vocab)

    num_training_data = len(data_training)
    nIter = num_training_data // net.batchSize

    # Prepare data generator which will be used for training the network
    train_data_generator = data_loader.getNextBatch(sess,
                                                    vgg,
                                                    images,
                                                    data_training,
                                                    question_vocab,
                                                    answer_vocab,
                                                    os.path.join(
                                                        C.image_base_path,
                                                        'train2014'),
                                                    batchSize=C.batchSize,
                                                    purpose='train')
    valid_data_generator = data_loader.getNextBatch(sess,
                                                    vgg,
                                                    images,
                                                    data_validation,
                                                    question_vocab,
                                                    answer_vocab,
                                                    os.path.join(
                                                        C.image_base_path,
                                                        'val2014'),
                                                    batchSize=C.batchSize,
                                                    purpose='val')

    # global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

    # Generate data in batches:
    # batch_question : [batchSize = 32, maxQuestionLength=22, questionVocabDim = 15xxx]
    # batch_answer   : [batchSize = 32, answer_vocab = 1000]
    # batch_image_id : [batchSize = 32, 'filename of all the images in the batch' -> ['487025', '487025', '78077' ...... ] ]
    # batch_features : [batchSize = 32, cnnHeight=14, cnnWidth=14, featureDim = 512]
    # batch_question,batch_answer,batch_image_id,batch_features = train_data_generator.next()

    batch_question, batch_answer, batch_image_id, batch_features = train_data_generator.next(
    )
    prev_loss = sess.run(net.cross_entropy, feed_dict = {   net.qs_ip  : batch_question ,	\
                   net.ans_ip : batch_answer  , 	\
                   net.cnn_ip : batch_features })
    # global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
    # sess.run(tf.initialize_variables([global_step]))
    batchCount = -1
    log_filename = './log_dir/train_' + datetime.now().strftime(
        "%Y%m%d-%H%M%S") + '.log'
    fHandle = open(log_filename, 'w')
    print("Writing log to file: ", log_filename)

    print("Training network\n")
    print("Initial Loss: ", prev_loss)
    print "Number of epochs:%d , \t Iteration per epoch:%d" % (num_epochs,
                                                               nIter)
    fHandle.write("Training Network\n")

    fHandle.write("Initial Loss: \n" % (prev_loss))

    start_time = time.time()

    for epoch in range(num_epochs):
        for iter in range(nIter):
            batchCount += 1
            batch_question, batch_answer, batch_image_id, batch_features = train_data_generator.next(
            )

            if (batchCount % 1 == 0):
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                [curr_train_loss, curr_train_acc , train_summary, true_answer, predicted_answer] = sess.run([net.cross_entropy, net.accuracy ,net.summary_op,
                                      net.true_answer, net.predicted_answer  ] , 			\
                                      feed_dict = {  net.qs_ip  : batch_question ,				\
                                      net.ans_ip : batch_answer  , 				\
                                      net.cnn_ip : batch_features },
                                      options=run_options,
                                      run_metadata=run_metadata)

                print("True labels")
                print true_answer
                print("Predicted labels")
                print predicted_answer

                net.writer.add_run_metadata(run_metadata,
                                            'step%03d' % batchCount)
                net.writer.add_summary(train_summary)

                valid_batch_question, valid_batch_answer, valid_batch_image_id, valid_batch_features = valid_data_generator.next(
                )
                [curr_valid_loss, curr_valid_acc, valid_summary ] = sess.run([net.cross_entropy, net.accuracy ,net.summary_op] ,
                                feed_dict = { net.qs_ip  : valid_batch_question ,   		\
                                    net.ans_ip : valid_batch_answer   , 		\
                                    net.cnn_ip : valid_batch_features } )

                if (curr_train_loss < prev_loss):
                    print("Loss decreased from %.4f to %.4f" %
                          (prev_loss, curr_train_loss))
                    print("Saving session")
                    fHandle.write("Loss decreased from %.4f to %.4f" %
                                  (prev_loss, curr_train_loss))
                    saver_all.save(sess,
                                   'checkpoints/vqa',
                                   global_step=net.global_step)
                    prev_loss = curr_train_loss
                print "Epoc:%d/%d_Iter:%d/%d,  TrainLoss:%.2f  TrainAccuracy:%.2f,  ValidLoss:%.2f  ValidAccuracy:%.2f  Elapsed time: %d" % (
                    epoch, num_epochs, iter, nIter, curr_train_loss,
                    curr_train_acc * 100, curr_valid_loss,
                    curr_valid_acc * 100, time.time() - start_time)
                fHandle.write(
                    "Epoc:%d/%d_Iter:%d/%d \t, TrainLoss: %.2f \t TrainAccuracy: %.2f \t, ValidLoss:%.2f \t ValidAccuracy:%.2f \t Elapsed time: %d\n"
                    % (epoch, num_epochs, iter, nIter, curr_train_loss,
                       curr_train_acc * 100, curr_valid_loss,
                       curr_valid_acc * 100, time.time() - start_time))
                start_time = time.time()
            # train the batch
            sess.run(net.train_step,
                     feed_dict={
                         net.qs_ip: batch_question,
                         net.ans_ip: batch_answer,
                         net.cnn_ip: batch_features
                     })
            # net.print_variables()
    net.writer.close()
    fHandle.close()