コード例 #1
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        type=str,
        default=
        '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json'
    )
    parser.add_argument(
        '--weights',
        type=str,
        default=
        '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5'
    )
    parser.add_argument('--sample_size', type=int, default=25)
    parser.add_argument('--caffe', help='path to caffe installation')
    parser.add_argument('--model_def',
                        help='path to model definition prototxt')
    parser.add_argument('--vggmodel',
                        default='VGG_ILSVRC_16_layers.caffemodel',
                        help='path to model parameters')
    args = parser.parse_args()
    print 'Loading Word2vec'
    nlp = English()
    print 'Loaded word2vec features'
    labelencoder = joblib.load('../models/labelencoder.pkl')
    print 'Loading Model'
    model = model_from_json(open(args.model).read())
    print 'Loading Weights'
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Loaded'
    q = True

    while q:

        path = str(raw_input('Enter path to image : '))
        if path != 'same':
            base_dir = os.path.dirname(path)
            os.system('python extract_features.py --caffe ' + str(args.caffe) +
                      ' --model_def vgg_features.prototxt --gpu --model ' +
                      str(args.vggmodel) + ' --image ' + path)
        print 'Loading VGGfeats'
        vgg_model_path = os.path.join(base_dir + '/vgg_feats.mat')
        features_struct = scipy.io.loadmat(vgg_model_path)
        VGGfeatures = features_struct['feats']
        print "Loaded"

        question = unicode(raw_input("Ask a question: "))
        if question == "quit":
            q = False
        timesteps = len(nlp(question))
        X_q = get_questions_tensor_timeseries([question], nlp, timesteps)
        X_i = np.reshape(VGGfeatures, (1, 4096))

        X = [X_q, X_i]

        y_predict = model.predict_classes(X, verbose=0)
        print labelencoder.inverse_transform(y_predict)
コード例 #2
0
ファイル: demo_batch.py プロジェクト: poyuwu/visual-qa
def main():
    """
	Before runnning this demo ensure that you have some images from the MS COCO validation set
	saved somewhere, and update the image_dir variable accordingly
	Also, this demo is designed to run with the models released with the visual-qa repo, if you
	would like to get use it with some other model (say an MLP based model or a langauge-only model)
	you will have to make some changes.
	"""
    image_dir = "../../vqa_images/"
    local_images = [f for f in listdir(image_dir) if isfile(join(image_dir, f))]

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-model",
        type=str,
        default="../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json",
    )
    parser.add_argument(
        "-weights",
        type=str,
        default="../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5",
    )
    parser.add_argument("-sample_size", type=int, default=25)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss="categorical_crossentropy", optimizer="rmsprop")
    print "Model loaded and compiled"
    images_val = open("../data/preprocessed/images_val2014.txt", "r").read().decode("utf8").splitlines()

    nlp = English()
    print "Loaded word2vec features"
    labelencoder = joblib.load("../models/labelencoder.pkl")

    vgg_model_path = "../features/coco/vgg_feats.mat"
    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct["feats"]
    print "Loaded vgg features"
    image_ids = open("../features/coco_vgg_IDMap.txt").read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    image_sample = random.sample(local_images, args.sample_size)

    for image in image_sample:
        p = subprocess.Popen(["display", image_dir + image])
        q = unicode(raw_input("Ask a question about the image:"))
        coco_id = str(int(image[-16:-4]))
        timesteps = len(nlp(q))  # questions sorted in descending order of length
        X_q = get_questions_tensor_timeseries([q], nlp, timesteps)
        X_i = get_images_matrix([coco_id], img_map, VGGfeatures)
        X = [X_q, X_i]
        y_predict = model.predict_classes(X, verbose=0)
        print labelencoder.inverse_transform(y_predict)
        raw_input("Press enter to continue...")
        p.kill()
コード例 #3
0
ファイル: own_image.py プロジェクト: BenJamesbabala/visual-qa
def main():

   parser = argparse.ArgumentParser()
   parser.add_argument('--model', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3.json')
   parser.add_argument('--weights', type=str, default='../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_epoch_070.hdf5')
   parser.add_argument('--sample_size', type=int, default=25)
   parser.add_argument('--caffe', help='path to caffe installation')
   parser.add_argument('--model_def', help='path to model definition prototxt')
   parser.add_argument('--vggmodel', default='VGG_ILSVRC_16_layers.caffemodel', help='path to model parameters')
   args = parser.parse_args()
   print 'Loading Word2vec'
   nlp = English()
   print 'Loaded word2vec features'
   labelencoder = joblib.load('../models/labelencoder.pkl')
   print 'Loading Model'
   model = model_from_json(open(args.model).read())
   print 'Loading Weights'
   model.load_weights(args.weights)
   model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
   print 'Loaded'
   q = True

   while q:

       path = str(raw_input('Enter path to image : '))
       if path != 'same':
           base_dir = os.path.dirname(path)
           os.system('python extract_features.py --caffe ' + str(args.caffe) + ' --model_def vgg_features.prototxt --gpu --model ' + str(args.vggmodel) + ' --image ' + path )
       print 'Loading VGGfeats'
       vgg_model_path = os.path.join(base_dir + '/vgg_feats.mat')
       features_struct = scipy.io.loadmat(vgg_model_path)
       VGGfeatures = features_struct['feats']
       print "Loaded"

       question = unicode(raw_input("Ask a question: "))
       if question == "quit":
           q = False
       timesteps = len(nlp(question))
       X_q = get_questions_tensor_timeseries([question], nlp, timesteps)
       X_i = np.reshape(VGGfeatures, (1, 4096))

       X = [X_q, X_i]

       y_predict = model.predict_classes(X, verbose=0)
       print labelencoder.inverse_transform(y_predict)
コード例 #4
0
ファイル: own_image.py プロジェクト: DESHRAJ/visual-qa
def predict():
    path = str(raw_input('Enter path to image : '))
    question = unicode(raw_input("Ask a question: "))
    print(question, path)
    if path != 'same':
        base_dir = os.path.dirname(path)
        os.system('python extract_features.py --caffe ' + str(CAFFE_PATH) + ' --model_def vgg_features.prototxt --gpu --model ' + str(CAFFE_MODEL_PATH) + ' --image ' + path )

    print 'Loading VGGfeats'
    vgg_model_path = os.path.join(base_dir + '/vgg_feats.mat')
    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print "Loaded"

    timesteps = len(nlp(question))
    X_q = get_questions_tensor_timeseries([question], nlp, timesteps)
    X_i = np.reshape(VGGfeatures, (1, 4096))

    X = [X_q, X_i]

    y_predict = model.predict_classes(X, verbose=0)
    ans = labelencoder.inverse_transform(y_predict)
    print(ans)
    return 'OK'
コード例 #5
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=512)
	parser.add_argument('-num_lstm_layers', type=int, default=2)
	parser.add_argument('-dropout', type=float, default=0.2)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	max_answers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)

	print 'Loaded questions, sorting by length...'
	questions_lengths_train, questions_train, answers_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train))))
	
	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	max_len = 30 #25 is max for training, 27 is max for validation
	word_vec_dim = 300

	model = Sequential()
	model.add(LSTM(output_dim = args.num_hidden_units, activation='tanh', 
			return_sequences=True, input_shape=(max_len, word_vec_dim)))
	model.add(Dropout(args.dropout))
	model.add(LSTM(args.num_hidden_units, return_sequences=False))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_lstm_layers_' + str(args.num_lstm_layers) + '_dropout_' + str(args.dropout)
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'

	#set up word vectors
        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'

	## training
        # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval)
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[0])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_q_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1))
コード例 #6
0
ファイル: evaluateLSTM.py プロジェクト: Goddard/visual-qa
def main():

	parser = argparse.ArgumentParser()
	parser.add_argument('-model', type=str, required=True)
	parser.add_argument('-weights', type=str, required=True)
	parser.add_argument('-results', type=str, required=True)
	args = parser.parse_args()

	model = model_from_json(open(args.model).read())
	model.load_weights(args.weights)
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

	questions_val = open('../data/preprocessed/questions_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	questions_lengths_val = open('../data/preprocessed/questions_lengths_val2014.txt', 
								'r').read().decode('utf8').splitlines()
	answers_val = open('../data/preprocessed/answers_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	images_val = open('../data/preprocessed/images_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	
	questions_lengths_val, questions_val, answers_val, images_val = (list(t) for t in zip(*sorted(zip(questions_lengths_val, questions_val, answers_val, images_val))))

	print 'Model compiled, weights loaded'
	labelencoder = joblib.load('../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'Loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

	nlp = English()
	print 'Loaded word2vec features'

	nb_classes = 1000
	y_predict_text = []
	batchSize = 128
	widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
           ' ', ETA()]
	pbar = ProgressBar(widgets=widgets)

	for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), 
												grouper(answers_val, batchSize, fillvalue=answers_val[0]), 
												grouper(images_val, batchSize, fillvalue=images_val[0]))):
		timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
		X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
		if 'language_only' in args.model:
			X_batch = X_q_batch
		else:
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			X_batch = [X_q_batch, X_i_batch]
		y_predict = model.predict_classes(X_batch, verbose=0)
		y_predict_text.extend(labelencoder.inverse_transform(y_predict))

	incorrect_val=0
	correct_val=0
	f1 = open(args.results, 'w')
	for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
		temp_count=0
		for _truth in truth.split(';'):
			if prediction == _truth:
				temp_count+=1

		if temp_count>2:
			correct_val+=1
		else:
			incorrect_val+=1

		f1.write(question.encode('utf-8'))
		f1.write('\n')
		f1.write(image.encode('utf-8'))
		f1.write('\n')
		f1.write(prediction)
		f1.write('\n')
		f1.write(truth.encode('utf-8'))
		f1.write('\n')
		f1.write('\n')

	f1.write('Final Accuracy is ' + str(float(correct_val)/(incorrect_val+correct_val)))
	f1.close()
	f1 = open('../results/overall_results.txt', 'a')
	f1.write(args.weights + '\n')
	f1.write(str(float(correct_val)/(incorrect_val+correct_val)) + '\n\n')
	f1.close()
	print 'Final Accuracy on the validation set is', float(correct_val)/(incorrect_val+correct_val)
コード例 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
    parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
    parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
    parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation_mlp', type=str, default='tanh')
    parser.add_argument('-num_epochs', type=int, default=100)
    parser.add_argument('-model_save_interval', type=int, default=5)
    parser.add_argument('-batch_size', type=int, default=128)
    #TODO Feature parser.add_argument('-resume_training', type=str)
    #TODO Feature parser.add_argument('-language_only', type=bool, default= False)
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    max_len = 30
    nb_classes = 1000

    #get the data
    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    max_answers = nb_classes
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)
    questions_lengths_train, questions_train, answers_train, images_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train,
                images_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    image_model = Sequential()
    image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, )))

    language_model = Sequential()
    if args.num_hidden_layers_lstm == 1:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False,
                 input_shape=(max_len, word_vec_dim)))
    else:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=True,
                 input_shape=(max_len, word_vec_dim)))
        for i in xrange(args.num_hidden_layers_lstm - 2):
            language_model.add(
                LSTM(output_dim=args.num_hidden_units_lstm,
                     return_sequences=True))
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False))

    model = Sequential()
    model.add(
        Merge([language_model, image_model], mode='concat', concat_axis=1))
    for i in xrange(args.num_hidden_layers_mlp):
        model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
        model.add(Activation(args.activation_mlp))
        model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
         '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
         str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
    open(model_file_name + '.json', 'w').write(json_string)

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done'

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    ## training
    print 'Training started...'
    for k in xrange(args.num_epochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])

        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:03d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=512)
    parser.add_argument('-num_lstm_layers', type=int, default=2)
    parser.add_argument('-dropout', type=float, default=0.2)
    parser.add_argument('-activation', type=str, default='tanh')
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    max_answers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)

    print 'Loaded questions, sorting by length...'
    questions_lengths_train, questions_train, answers_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')
    max_len = 30  #25 is max for training, 27 is max for validation
    word_vec_dim = 300

    model = Sequential()
    model.add(
        LSTM(output_dim=args.num_hidden_units,
             activation='tanh',
             return_sequences=True,
             input_shape=(max_len, word_vec_dim)))
    model.add(Dropout(args.dropout))
    model.add(LSTM(args.num_hidden_units, return_sequences=False))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(
        args.num_hidden_units) + '_num_lstm_layers_' + str(
            args.num_lstm_layers) + '_dropout_' + str(args.dropout)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    #set up word vectors
    nlp = English()
    print 'loaded word2vec features...'

    ## training
    print 'Training started...'
    numEpochs = 100
    model_save_interval = 5
    batchSize = 128
    for k in xrange(numEpochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        batchSize,
                        fillvalue=questions_train[0]),
                grouper(answers_train, batchSize, fillvalue=answers_train[0]),
                grouper(images_train, batchSize, fillvalue=images_train[0])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_q_batch, Y_batch)
            progbar.add(batchSize, values=[("train loss", loss)])

        if k % model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))
コード例 #9
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-model', type=str, required=True)
    parser.add_argument('-weights', type=str, required=True)
    parser.add_argument('-results', type=str, required=True)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    questions_val = open('../data/preprocessed/questions_val2014.txt',
                         'r').read().decode('utf8').splitlines()
    questions_lengths_val = open(
        '../data/preprocessed/questions_lengths_val2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_val = open('../data/preprocessed/answers_val2014_all.txt',
                       'r').read().decode('utf8').splitlines()
    images_val = open('../data/preprocessed/images_val2014.txt',
                      'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    questions_lengths_val, questions_val, answers_val, images_val = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_val, questions_val, answers_val,
                images_val))))

    print 'Model compiled, weights loaded'
    labelencoder = joblib.load('../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'Loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'Loaded word2vec features'

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = [
        'Evaluating ',
        Percentage(), ' ',
        Bar(marker='#', left='[', right=']'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(
            zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]),
                grouper(answers_val, batchSize, fillvalue=answers_val[0]),
                grouper(images_val, batchSize, fillvalue=images_val[0]))):
        timesteps = len(nlp(
            qu_batch[-1]))  #questions sorted in descending order of length
        X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
        if 'language_only' in args.model:
            X_batch = X_q_batch
        else:
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            X_batch = [X_q_batch, X_i_batch]
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    total = 0
    correct_val = 0.0
    f1 = open(args.results, 'w')
    for prediction, truth, question, image in zip(y_predict_text, answers_val,
                                                  questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(';'):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            correct_val += float(temp_count) / 3

        total += 1

        f1.write(question.encode('utf-8'))
        f1.write('\n')
        f1.write(image.encode('utf-8'))
        f1.write('\n')
        f1.write(prediction)
        f1.write('\n')
        f1.write(truth.encode('utf-8'))
        f1.write('\n')
        f1.write('\n')

    f1.write('Final Accuracy is ' + str(correct_val / total))
    f1.close()
    f1 = open('../results/overall_results.txt', 'a')
    f1.write(args.weights + '\n')
    f1.write(str(correct_val / total) + '\n\n')
    f1.close()
    print 'Final Accuracy on the validation set is', correct_val / total
コード例 #10
0
def main():
    '''
	Before runnning this demo ensure that you have some images from the MS COCO validation set
	saved somewhere, and update the image_dir variable accordingly
	Also, this demo is designed to run with the models released with the visual-qa repo, if you
	would like to get use it with some other model (say an MLP based model or a langauge-only model)
	you will have to make some changes.
	'''
    image_dir = '../../vqa_images/'
    local_images = [
        f for f in listdir(image_dir) if isfile(join(image_dir, f))
    ]

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-model',
        type=str,
        default=
        '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json'
    )
    parser.add_argument(
        '-weights',
        type=str,
        default=
        '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_199.hdf5'
    )
    parser.add_argument('-sample_size', type=int, default=25)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Model loaded and compiled'
    images_val = open('../data/preprocessed/images_val2014.txt',
                      'r').read().decode('utf8').splitlines()

    nlp = English()
    print 'Loaded word2vec features'
    labelencoder = joblib.load('../models/labelencoder.pkl')

    vgg_model_path = '../features/coco/vgg_feats.mat'
    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'Loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    image_sample = random.sample(local_images, args.sample_size)

    for image in image_sample:
        p = subprocess.Popen(["display", image_dir + image])
        q = unicode(raw_input("Ask a question about the image:"))
        coco_id = str(int(image[-16:-4]))
        timesteps = len(
            nlp(q))  #questions sorted in descending order of length
        X_q = get_questions_tensor_timeseries([q], nlp, timesteps)
        X_i = get_images_matrix([coco_id], img_map, VGGfeatures)
        X = [X_q, X_i]
        y_predict = model.predict_classes(X, verbose=0)
        print labelencoder.inverse_transform(y_predict)
        raw_input('Press enter to continue...')
        p.kill()
コード例 #11
0
ファイル: trainLSTM_1.py プロジェクト: iamaaditya/visual-qa
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
	parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
	parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
	parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation_mlp', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	#TODO Feature parser.add_argument('-resume_training', type=str)
	#TODO Feature parser.add_argument('-language_only', type=bool, default= False)
	args = parser.parse_args()

	word_vec_dim= 300
	img_dim = 4096
	max_len = 30
	nb_classes = 1000

	#get the data
	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'

	max_answers = nb_classes
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)
	questions_lengths_train, questions_train, answers_train, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, images_train))))

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	
	image_model = Sequential()
	image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,)))

	language_model = Sequential()
	if args.num_hidden_layers_lstm == 1:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim)))
	else:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim)))
		for i in xrange(args.num_hidden_layers_lstm-2):
			language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True))
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False))

	model = Sequential()
	model.add(Merge([language_model, image_model], mode='concat', concat_axis=1))
	for i in xrange(args.num_hidden_layers_mlp):
		model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
		model.add(Activation(args.activation_mlp))
		model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
						'_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
						str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
	open(model_file_name + '.json', 'w').write(json_string)

	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done'

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	## training
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
コード例 #12
0
 def process_input(self, question):
     return get_questions_tensor_timeseries(question, self._nlp,
                                            self._max_len)
コード例 #13
0
ファイル: evaluateMLP.py プロジェクト: tphiep/visualqa
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model',
                        type=str,
                        required=True,
                        help="JSON dump of saved model structure.")
    parser.add_argument('-weights',
                        type=str,
                        required=True,
                        help="Saved weights (checkpoint).")
    parser.add_argument('-results',
                        type=str,
                        required=True,
                        help="File where to write the results.")
    parser.add_argument('-results_json',
                        type=str,
                        required=True,
                        help="File where to dump the evaluation results in "
                        "JSON format, so that the official VQA toolkit "
                        "can read it.")
    parser.add_argument('-dataroot', type=str, default='/data/vqa')
    args = parser.parse_args()
    root = args.dataroot

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    questions_val = lines(pjoin(root, 'Preprocessed', 'questions_val2014.txt'))
    questions_id = lines(
        pjoin(root, 'Preprocessed', 'questions_id_val2014.txt'))
    answers_val = lines(pjoin(root, 'Preprocessed', 'answers_val2014_all.txt'))
    images_val = lines(pjoin(root, 'Preprocessed', 'images_val2014_all.txt'))
    vgg_model_path = pjoin(root, 'coco', 'vgg_feats.mat')

    print('Model compiled, weights loaded...')

    # Load the encoder which converts answers to IDs, saved in the same
    # folder as the rest of the dumps.
    exp_root = args.weights[:args.weights.rfind('/')]
    labelencoder = joblib.load(pjoin(exp_root, 'labelencoder.pkl'))

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print('loaded vgg features')
    image_ids = lines(pjoin(root, 'coco_vgg_IDMap.txt'))

    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print('loaded word2vec features')

    nb_classes = 1000
    y_predict_text = []

    # TODO(andrei): Configure this via args.
    batchSize = 512

    stuff = batchify(batchSize, questions_val, answers_val, images_val)
    with click.progressbar(stuff) as pbar:
        for (qu_batch, an_batch, im_batch) in pbar:
            # TODO(Bernhard): make this choose the right preprocessing and right model,
            # for now you have to plug it in manually
            #X_q_batch = get_questions_matrix_sum(qu_batch, nlp) # for sum up model
            X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp,
                                                        20)  # for LSTM model

            if 'language_only' in args.model:
                y_predict = model.predict_classes([X_q_batch], verbose=0)
            else:
                X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
                y_predict = model.predict_classes([X_q_batch, X_i_batch],
                                                  verbose=0)
            # TODO(Bernhard): verify that predict_classes sets dropout to 0
            y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    correct_val = 0.0
    total = 0
    f1 = open(args.results, 'w')
    print("Will dump resulting answers in JSON format to file: [{0}]".format(
        args.results_json))
    result_file_json = open(args.results_json, 'w')
    result_file_json.write("[")

    all_preds = list(
        zip(y_predict_text, answers_val, questions_val, questions_id,
            images_val))
    for idx, (prediction, truth, question, question_id,
              image) in enumerate(all_preds):
        temp_count = 0
        for _truth in truth.split(';'):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            correct_val += float(temp_count) / 3

        total += 1
        f1.write(question)
        f1.write('\n')
        f1.write(image)
        f1.write('\n')
        f1.write(prediction)
        f1.write('\n')
        f1.write(truth)
        f1.write('\n')
        f1.write('\n')

        # Note: Double-braces are escaped braces in Python format strings.
        result_file_json.write(
            '{{"answer": "{0}", "question_id": {1}}}{2}\n'.format(
                prediction, question_id,
                ',' if idx < len(all_preds) - 1 else ''))

    result_file_json.write("]\n")
    f1.write('Final Accuracy is ' + str(correct_val / total))
    f1.close()

    # TODO(andrei): Re-add this, so we are neat about keeping track of all our
    # results.
    # f1 = open('../results/overall_results.txt', 'a')
    # f1.write(args.weights + '\n')
    # f1.write(str(correct_val / total) + '\n')
    # f1.close()
    print('Final Accuracy on the validation set is', correct_val / total)
コード例 #14
0
def main():

	parser = argparse.ArgumentParser()
	parser.add_argument('-model', type=str, required=True)
	parser.add_argument('-weights', type=str, required=True)
	parser.add_argument('-results', type=str, required=True)
	args = parser.parse_args()

	model = model_from_json(open(args.model).read())
	model.load_weights(args.weights)
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

	questions_test = open('../data/preprocessed/questions_test-dev2015.txt', 
						'r').read().decode('utf8').splitlines()
	questions_lengths_test = open('../data/preprocessed/questions_lengths_test-dev2015.txt', 
								'r').read().decode('utf8').splitlines()
	questions_id_test = open('../data/preprocessed/questions_id_test-dev2015.txt', 
								'r').read().decode('utf8').splitlines()
	images_test = open('../data/preprocessed/images_test-dev2015.txt', 
						'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats_test.mat'
	
	questions_lengths_test, questions_test, images_test, questions_id_test = (list(t) for t in zip(*sorted(zip(questions_lengths_test, questions_test, images_test, questions_id_test))))

	print 'Model compiled, weights loaded'
	labelencoder = joblib.load('../models/labelencoder_trainval.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'Loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap_test.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

	nlp = English()
	print 'Loaded word2vec features'

	nb_classes = 1000
	y_predict_text = []
	batchSize = 128
	widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
           ' ', ETA()]
	pbar = ProgressBar(widgets=widgets)

	for qu_batch,im_batch in pbar(zip(grouper(questions_test, batchSize, fillvalue=questions_test[-1]), 
												grouper(images_test, batchSize, fillvalue=images_test[-1]))):
		timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
		X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
		if 'language_only' in args.model:
			X_batch = X_q_batch
		else:
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			X_batch = [X_q_batch, X_i_batch]
		y_predict = model.predict_classes(X_batch, verbose=0)
		y_predict_text.extend(labelencoder.inverse_transform(y_predict))

	results = []
	
	f1 = open(args.results, 'w')
	for prediction, question, question_id, image in zip(y_predict_text, questions_test, questions_id_test, images_test):
		answer = {}
		answer['question_id'] = int(question_id)
		answer['answer'] = prediction
		results.append(answer)

		f1.write(question.encode('utf-8'))
		f1.write('\n')
		f1.write(image.encode('utf-8'))
		f1.write('\n')
		f1.write(prediction)
		f1.write('\n')
		f1.write(question_id.encode('utf-8'))
		f1.write('\n')
		f1.write('\n')

	f1.close()

	f2 = open('../results/submission_test-dev2015.json', 'w')
	f2.write(json.dumps(results))
	f2.close()
	print 'Results saved to', args.results
コード例 #15
0
ファイル: evaluateLSTM.py プロジェクト: fireae/visual-qa
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("-model", type=str, required=True)
    parser.add_argument("-weights", type=str, required=True)
    parser.add_argument("-results", type=str, required=True)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

    questions_val = open("../data/preprocessed/questions_val2014.txt", "r").read().decode("utf8").splitlines()
    questions_lengths_val = (
        open("../data/preprocessed/questions_lengths_val2014.txt", "r").read().decode("utf8").splitlines()
    )
    answers_val = open("../data/preprocessed/answers_val2014.txt", "r").read().decode("utf8").splitlines()
    images_val = open("../data/preprocessed/images_val2014.txt", "r").read().decode("utf8").splitlines()
    vgg_model_path = "../features/coco/vgg_feats.mat"

    questions_lengths_val, questions_val, answers_val, images_val = (
        list(t) for t in zip(*sorted(zip(questions_lengths_val, questions_val, answers_val, images_val)))
    )

    print "Model compiled, weights loaded"
    labelencoder = joblib.load("../models/labelencoder.pkl")

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct["feats"]
    print "Loaded vgg features"
    image_ids = open("../features/coco/coco_vgg_IDMap.txt").read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print "Loaded word2vec features"

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = ["Evaluating ", Percentage(), " ", Bar(marker="#", left="[", right="]"), " ", ETA()]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(
        zip(
            grouper(questions_val, batchSize, fillvalue=questions_val[0]),
            grouper(answers_val, batchSize, fillvalue=answers_val[0]),
            grouper(images_val, batchSize, fillvalue=images_val[0]),
        )
    ):
        timesteps = len(nlp(qu_batch[-1]))  # questions sorted in descending order of length
        X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
        if "language_only" in args.model:
            X_batch = X_q_batch
        else:
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            X_batch = [X_q_batch, X_i_batch]
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    incorrect_val = 0
    correct_val = 0
    f1 = open(args.results, "w")
    for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(";"):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            incorrect_val += 1

        f1.write(question.encode("utf-8"))
        f1.write("\n")
        f1.write(image.encode("utf-8"))
        f1.write("\n")
        f1.write(prediction)
        f1.write("\n")
        f1.write(truth.encode("utf-8"))
        f1.write("\n")
        f1.write("\n")

    f1.write("Final Accuracy is " + str(float(correct_val) / (incorrect_val + correct_val)))
    f1.close()
    f1 = open("../results/overall_results.txt", "a")
    f1.write(args.weights + "\n")
    f1.write(str(float(correct_val) / (incorrect_val + correct_val)) + "\n\n")
    f1.close()
    print "Final Accuracy on the validation set is", float(correct_val) / (incorrect_val + correct_val)