def save_pickle():
    
    answers = open('../data/ans_txt.txt', 'r').read().decode('utf8').splitlines()
    print(len(answers))
    questions = open("../data/ques_txt.txt", 'r').read().decode('utf8').splitlines()
    images = open("../data/img_id_txt.txt", 'r').read().decode('utf8').splitlines()
    maxAnswers = 1000
    questions, answers, images = selectFrequentAnswers(questions,answers,images, maxAnswers)
    
    encoder = preprocessing.LabelEncoder()
    encoder.fit(answers)
    print ("Number of classes: " + str(len(list(encoder.classes_))))
    joblib.dump(encoder, '../data/encoder.pkl')
    return "DONE"
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=512)
	parser.add_argument('-num_lstm_layers', type=int, default=2)
	parser.add_argument('-dropout', type=float, default=0.2)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	max_answers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)

	print 'Loaded questions, sorting by length...'
	questions_lengths_train, questions_train, answers_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train))))
	
	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	max_len = 30 #25 is max for training, 27 is max for validation
	word_vec_dim = 300

	model = Sequential()
	model.add(LSTM(output_dim = args.num_hidden_units, activation='tanh', 
			return_sequences=True, input_shape=(max_len, word_vec_dim)))
	model.add(Dropout(args.dropout))
	model.add(LSTM(args.num_hidden_units, return_sequences=False))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_lstm_layers_' + str(args.num_lstm_layers) + '_dropout_' + str(args.dropout)
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'

	#set up word vectors
        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'

	## training
        # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval)
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[0])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_q_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1))
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
    parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
    parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
    parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation_mlp', type=str, default='tanh')
    parser.add_argument('-num_epochs', type=int, default=100)
    parser.add_argument('-model_save_interval', type=int, default=5)
    parser.add_argument('-batch_size', type=int, default=128)
    #TODO Feature parser.add_argument('-resume_training', type=str)
    #TODO Feature parser.add_argument('-language_only', type=bool, default= False)
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    max_len = 30
    nb_classes = 1000

    #get the data
    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    max_answers = nb_classes
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)
    questions_lengths_train, questions_train, answers_train, images_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train,
                images_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    image_model = Sequential()
    image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, )))

    language_model = Sequential()
    if args.num_hidden_layers_lstm == 1:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False,
                 input_shape=(max_len, word_vec_dim)))
    else:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=True,
                 input_shape=(max_len, word_vec_dim)))
        for i in xrange(args.num_hidden_layers_lstm - 2):
            language_model.add(
                LSTM(output_dim=args.num_hidden_units_lstm,
                     return_sequences=True))
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False))

    model = Sequential()
    model.add(
        Merge([language_model, image_model], mode='concat', concat_axis=1))
    for i in xrange(args.num_hidden_layers_mlp):
        model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
        model.add(Activation(args.activation_mlp))
        model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
         '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
         str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
    open(model_file_name + '.json', 'w').write(json_string)

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done'

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    ## training
    print 'Training started...'
    for k in xrange(args.num_epochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])

        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:03d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=512)
    parser.add_argument('-num_lstm_layers', type=int, default=2)
    parser.add_argument('-dropout', type=float, default=0.2)
    parser.add_argument('-activation', type=str, default='tanh')
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    max_answers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)

    print 'Loaded questions, sorting by length...'
    questions_lengths_train, questions_train, answers_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')
    max_len = 30  #25 is max for training, 27 is max for validation
    word_vec_dim = 300

    model = Sequential()
    model.add(
        LSTM(output_dim=args.num_hidden_units,
             activation='tanh',
             return_sequences=True,
             input_shape=(max_len, word_vec_dim)))
    model.add(Dropout(args.dropout))
    model.add(LSTM(args.num_hidden_units, return_sequences=False))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(
        args.num_hidden_units) + '_num_lstm_layers_' + str(
            args.num_lstm_layers) + '_dropout_' + str(args.dropout)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    #set up word vectors
    nlp = English()
    print 'loaded word2vec features...'

    ## training
    print 'Training started...'
    numEpochs = 100
    model_save_interval = 5
    batchSize = 128
    for k in xrange(numEpochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        batchSize,
                        fillvalue=questions_train[0]),
                grouper(answers_train, batchSize, fillvalue=answers_train[0]),
                grouper(images_train, batchSize, fillvalue=images_train[0])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_q_batch, Y_batch)
            progbar.add(batchSize, values=[("train loss", loss)])

        if k % model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))
Beispiel #5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=1024)
    parser.add_argument('-num_hidden_layers', type=int, default=3)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation', type=str, default='tanh')
    parser.add_argument('-language_only', type=bool, default=False)
    parser.add_argument('-num_epochs', type=int, default=10)
    parser.add_argument('-model_save_interval', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=128)
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014_modal.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'
    maxAnswers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, maxAnswers)

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    id_map = {}
    for ids in image_ids:
        id_split = ids.split()
        id_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    img_dim = 4096
    word_vec_dim = 300

    model = Sequential()
    if args.language_only:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=word_vec_dim,
                  init='uniform'))
    else:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=img_dim + word_vec_dim,
                  init='uniform'))
    model.add(Activation(args.activation))
    if args.dropout > 0:
        model.add(Dropout(args.dropout))
    for i in xrange(args.num_hidden_layers - 1):
        model.add(Dense(args.num_hidden_units, init='uniform'))
        model.add(Activation(args.activation))
        if args.dropout > 0:
            model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    if args.language_only:
        model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    else:
        model_file_name = '../models/mlp_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    print 'Training started...'
    for k in xrange(args.num_epochs):
        #shuffle the data points before going through them
        index_shuf = range(len(questions_train))
        shuffle(index_shuf)
        questions_train = [questions_train[i] for i in index_shuf]
        answers_train = [answers_train[i] for i in index_shuf]
        images_train = [images_train[i] for i in index_shuf]
        progbar = generic_utils.Progbar(len(questions_train))
        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
            if args.language_only:
                X_batch = X_q_batch
            else:
                X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
                X_batch = np.hstack((X_q_batch, X_i_batch))
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_batch, Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])
        #print type(loss)
        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Beispiel #6
0
def main():

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	maxAnswers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	id_map = {}
	for ids in image_ids:
		id_split = ids.split()
		id_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	if args.language_only:
		model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
	else:
		model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)

	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'
	
	print 'Training started...'
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			if args.language_only:
				X_batch = X_q_batch
			else:
				X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
				X_batch = np.hstack((X_q_batch, X_i_batch))
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])
		#print type(loss)
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Beispiel #7
0
def main():
	cwd = os.getcwd()

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=2)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-model_weights_path', type=str, default=cwd+'/vgg/vgg16_weights.h5')
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2015.txt')
	parser.add_argument('-answers_train',type=str, default = cwd+'/data/preprocessed/answers_train2015_modal.txt')
	parser.add_argument('-im_dir',type=str, default =cwd+'/data/preprocessed/scene_img_abstract_v002_train2015/')
	#parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt')
	args = parser.parse_args()

	questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines()
	answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines()
	images_train = open(cwd+'/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines()
	#vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change
	maxAnswers = 100
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl')

	#features_struct = scipy.io.loadmat(vgg_model_path)
	#VGGfeatures = features_struct['feats']
	# print 'loaded vgg features'
	# image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines()
	# id_map = {}
	# for ids in image_ids:
	# 	id_split = ids.split()
	# 	id_map[id_split[0]] = int(id_split[1])

	vgg_model = vgg16.VGG_16(args.model_weights_path)
	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
	print 'loaded vgg model...'

	nlp = English()
	print 'loaded word2vec features...'

	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Training started...'
	id_map = {}
	f1 = open('abstract_image_precompute')
	f2 = open('abstract_image_precompute_reverse')
	VGGfeatures = np.loadtxt(f1)
	VGGfeatures_reverse = np.loadtxt(f2)
	f1.close()
	f2.close()
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			im_path = args.im_dir +"abstract_v002_train2015_"
			print 'getting image features...'
			X_i_batch = get_images_matrix(im_batch, VGGfeatures, VGGfeatures_reverse)
			# X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path, id_map)
			X_batch = np.hstack((X_q_batch, X_i_batch))

			Y_batch = get_answers_matrix(an_batch, labelencoder)
			print 'running training on batch...'
			
			loss = model.train_on_batch(X_batch, Y_batch)
			
			progbar.add(args.batch_size, values=[("train loss", loss)])

		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Beispiel #8
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
	parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
	parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
	parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation_mlp', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	#TODO Feature parser.add_argument('-resume_training', type=str)
	#TODO Feature parser.add_argument('-language_only', type=bool, default= False)
	args = parser.parse_args()

	word_vec_dim= 300
	img_dim = 4096
	max_len = 30
	nb_classes = 1000

	#get the data
	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'

	max_answers = nb_classes
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)
	questions_lengths_train, questions_train, answers_train, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, images_train))))

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	
	image_model = Sequential()
	image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,)))

	language_model = Sequential()
	if args.num_hidden_layers_lstm == 1:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim)))
	else:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim)))
		for i in xrange(args.num_hidden_layers_lstm-2):
			language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True))
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False))

	model = Sequential()
	model.add(Merge([language_model, image_model], mode='concat', concat_axis=1))
	for i in xrange(args.num_hidden_layers_mlp):
		model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
		model.add(Activation(args.activation_mlp))
		model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
						'_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
						str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
	open(model_file_name + '.json', 'w').write(json_string)

	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done'

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	## training
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
parser.add_argument('-batch_size', type=int, default=128)
args = parser.parse_args()

questions_train = open('./data/questions_train2014.txt',
                       'r').read().decode('utf8').splitlines()
answers_train = open('./data/answers_train2014_modal.txt',
                     'r').read().decode('utf8').splitlines()
images_train = open('./data/images_train2014.txt',
                    'r').read().decode('utf8').splitlines()

logging.debug("Length of questions_train %d", len(questions_train))
logging.debug("Length of answers_train %d", len(answers_train))
logging.debug("Length of images_train %d", len(images_train))

maxAnswers = 1000
questions_train, answers_train, images_train = selectFrequentAnswers(
    questions_train, answers_train, images_train, maxAnswers)

logging.debug("Length of the lists after select Frequent Answers")
logging.debug("Length of questions_train %d", len(questions_train))
logging.debug("Length of answers_train %d", len(answers_train))
logging.debug("Length of images_train %d", len(images_train))

# generates numerical labels for all the answers in answers_train.
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))
joblib.dump(labelencoder, 'labelencoder.pkl')

# TODO Get vectors for each image from Sherlock and load them into an array here
image_ids = open("./id_map.txt").read().splitlines()
id_map = {}
Beispiel #10
0
def main():
	print 'Train LSTM encoder + MLP decoder'
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
	parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
	parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
	parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation_mlp', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=4096)
	parser.add_argument('-gap_layer_units', type = int, default = 1024)
	#TODO Feature parser.add_argument('-resume_training', type=str)
	#TODO Feature parser.add_argument('-language_only', type=bool, default= False)
	args = parser.parse_args()

	word_vec_dim= 300
	img_dim = 4096
	max_len = 30
	nb_classes = 1000

	#get the data
	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train_all = open('../data/preprocessed/answers_train2014_all.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'

	max_answers = nb_classes
	questions_train, answers_train, answers_train_all, images_train = selectFrequentAnswers(questions_train,answers_train, answers_train_all, images_train, max_answers)
	questions_lengths_train, questions_train, answers_train, answers_train_all, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, answers_train_all, images_train))))

	questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_val = open('../data/preprocessed/questions_lengths_val2014.txt', 'r').read().decode('utf8').splitlines()
	answers_val = open('../data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines()
	images_val = open('../data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	
	questions_lengths_val, questions_val, answers_val, images_val = (list(t) for t in zip(*sorted(zip(questions_lengths_val, questions_val, answers_val, images_val))))

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	
	image_input = Input(shape = (img_dim, ), name = 'image_input')
	# image_gap = Dense(args.gap_layer_units, activation = args.activation_mlp)(image_input)

	language_input = Input(shape = (None, word_vec_dim), name = 'language_input')	
	lstm_out = LSTM(args.num_hidden_units_lstm)(language_input)
	# lstm_gap = Dense(args.gap_layer_units, activation = args.activation_mlp)(lstm_out)
	
	# x = keras.layers.concatenate([lstm_gap, image_gap])
	x = keras.layers.concatenate([lstm_out, image_input])
	x = Dense(1024, activation = args.activation_mlp)(x)
	x = Dropout(args.dropout)(x)
	x = Dense(512, activation = args.activation_mlp)(x)
	x = Dropout(args.dropout)(x)
	x = Dense(256, activation = args.activation_mlp)(x)
	x = Dropout(args.dropout)(x)
	main_output = Dense(nb_classes, activation = 'softmax', name = 'main_output')(x)
	model = Model(inputs = [language_input, image_input], outputs = [main_output])

	# args.model = '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json'
	# args.weights = '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_100.hdf5'

	# model = model_from_json(open(args.model).read())
	# model.load_weights(args.weights)

	json_string = model.to_json()
	model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
						'_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
						str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
	results_path = '../results/lstm_decoder_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
						'_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
						str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
	open(model_file_name + '.json', 'w').write(json_string)

	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done'

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

	nlp = English()
	print 'loaded word2vec features...'
	## training
	print 'Training started...'

	Acc_train = [0] * args.num_epochs
	Acc_val = [0] * args.num_epochs
	loss_list = [0] * args.num_epochs

	f1 = open('../results/loss_accuracy_lstm_encoder.txt', 'a')
	f1.write(model_file_name + '\n')
	for k in xrange(args.num_epochs):

		print str(400 + k + 1) + 'th Iteration ...'

		progbar = generic_utils.Progbar(len(questions_train))
		loss_sum = 0
		it = 0
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			X_i_batch_normalized = preprocessing.normalize(X_i_batch, norm='l2')
			#print X_i_batch.shape, X_q_batch.shape
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch([X_q_batch, X_i_batch_normalized], Y_batch)
			progbar.add(args.batch_size, values=[("train loss", loss)])
			it += 1
			loss_sum += loss
		print " " + str(loss_sum / float(it))

		if (k + 1)%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k + 1))

		loss_list[k] = loss_sum /float(it)
		f1.write(str(loss_list[k]) + ' ')

		# print '    Results on Training set: '
		# Acc_train[k] = Validation_LSTM_encoder(model, questions_train, answers_train_all, images_train, img_map ,VGGfeatures, labelencoder, \
		# 	args.batch_size, nlp, nb_classes, results_path+'_train', model_file_name+'_train')
		print '    Results on Validation set: '
		Acc_val[k] = Validation_LSTM_encoder(model, questions_val, answers_val, images_val, img_map ,VGGfeatures, labelencoder, \
			args.batch_size, nlp, nb_classes, results_path, model_file_name)
		f1.write(str(Acc_val[k]) + '\n')
	
	f1.close()

	plt.figure(1)
	plt.xlabel('Iterations')
	plt.ylabel('Accuracy')
	plt.title('Accuracy on Training and Validation set')
	# plt.plot(range(args.num_epochs), Acc_train, 'b-', label = 'Accuracy on Training set')
	# plt.hold(True)
	plt.plot(range(args.num_epochs), Acc_val, 'r--', label = 'Accuracy on Validation set')
	plt.legend(loc = 'lower right')
	plt.savefig('accuracy_train_val.png')

	plt.figure(2)
	plt.xlabel('Iterations')
	plt.ylabel('Loss')
	plt.title('Convergence curve')
	plt.plot(range(args.num_epochs), loss_list, 'r--')
	plt.savefig('Convergence_curve.png')

	model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k+1))
Beispiel #11
0
def main():
    print 'Train MLP'
    parser = argparse.ArgumentParser()
    parser.add_argument('-featureType', type=str,
                        default='BoW')  #BoW, WordsGlove, SentGlove
    parser.add_argument('-num_hidden_units', type=int, default=1024)
    parser.add_argument('-num_hidden_layers', type=int, default=3)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation', type=str, default='tanh')
    parser.add_argument('-language_only', type=bool, default=False)
    parser.add_argument('-num_epochs', type=int, default=2000)
    parser.add_argument('-model_save_interval', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=2048)
    parser.add_argument('-num_top_all_words', type=int, default=1000)
    parser.add_argument('-num_top_start_words', type=int, default=10)
    parser.add_argument('-num_start_words', type=int, default=3)
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014_modal.txt',
                         'r').read().decode('utf8').splitlines()
    answers_train_all = open('../data/preprocessed/answers_train2014_all.txt',
                             'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    maxAnswers = 1000
    questions_train, answers_train, answers_train_all, images_train = selectFrequentAnswers(
        questions_train, answers_train, answers_train_all, images_train,
        maxAnswers)

    # print [answers_train.count(answers_train[i]) for i in range(1000)]
    print max([answers_train.count(answers_train[i]) for i in range(1000)])
    print min([answers_train.count(answers_train[i]) for i in range(1000)])
    print np.mean([answers_train.count(answers_train[i]) for i in range(1000)])

    questions_val = open('../data/preprocessed/questions_val2014.txt',
                         'r').read().decode('utf8').splitlines()
    questions_lengths_val = open(
        '../data/preprocessed/questions_lengths_val2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_val = open('../data/preprocessed/answers_val2014_all.txt',
                       'r').read().decode('utf8').splitlines()
    images_val = open('../data/preprocessed/images_val2014_all.txt',
                      'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    if args.featureType == 'WordsGlove' or args.featureType == 'SentGlove':
        nlp = English()
        print 'loaded word2vec features'
    elif args.featureType == 'BoW':
        num_top_all_words = args.num_top_all_words
        num_top_start_words = args.num_top_start_words
        num_start_words = args.num_start_words
        train_question_file = '../data/preprocessed/questions_train2014.txt'
        vectorizers_list = computeBoWfeatures(num_top_all_words,
                                              num_top_start_words,
                                              num_start_words,
                                              train_question_file)
        # print len(vectorizers_list)
        print 'computed BoW features'

    img_dim = 4096
    if args.featureType == 'WordsGlove' or args.featureType == 'SentGlove':
        word_vec_dim = 300
    elif args.featureType == 'BoW':
        word_vec_dim = num_top_all_words + num_top_start_words * num_start_words

    model = Sequential()
    if args.language_only:
        model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim))
    else:
        model.add(
            Dense(1024,
                  activation=args.activation,
                  input_dim=img_dim + word_vec_dim))
        model.add(Dropout(args.dropout))

    model.add(Dense(1024, activation=args.activation))
    model.add(Dropout(args.dropout))
    model.add(Dense(1024, activation=args.activation))
    model.add(Dropout(args.dropout))

    # for i in xrange(args.num_hidden_layers - 1):
    # 	model.add(Dense(args.num_hidden_units, activation = args.activation))
    # 	if args.dropout > 0:
    # model.add(Dropout(args.dropout))

    model.add(Dense(nb_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    json_string = model.to_json()
    if args.language_only:
        model_file_name = '../models/' + args.featureType + '_mlp_language_only_num_hidden_units_' \
             + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
    else:
        model_file_name = '../models/' + args.featureType + '_mlp_num_hidden_units_' \
             + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
    if args.language_only:
        results_path = '../results/' + args.featureType + '_mlp_language_only_num_hidden_units_' \
             + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
    else:
        results_path = '../results/' + args.featureType + '_mlp_num_hidden_units_' \
             + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)

    open(model_file_name + '.json', 'w').write(json_string)

    Acc_train = [0] * args.num_epochs
    Acc_val = [0] * args.num_epochs
    loss_list = [0] * args.num_epochs

    index_shuf = range(len(questions_train))
    shuffle(index_shuf)
    questions_train = [questions_train[i] for i in index_shuf]
    answers_train_all = [answers_train_all[i] for i in index_shuf]
    answers_train = [answers_train[i] for i in index_shuf]
    images_train = [images_train[i] for i in index_shuf]

    print 'Training started...'

    f1 = open('../results/loss_accuracy_mlp' + args.featureType + '.txt', 'a')
    f1.write(model_file_name + '\n')

    for k in xrange(args.num_epochs):
        print str(k + 1) + 'th Iteration'
        #shuffle the data points before going through them

        progbar = generic_utils.Progbar(len(questions_train))
        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            if args.featureType == 'WordsGlove':
                X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
            elif args.featureType == 'SentGlove':
                X_q_batch = get_questions_matrix_sentGlove(qu_batch, nlp)
            elif args.featureType == 'BoW':
                X_q_batch = get_questions_BoW(qu_batch, vectorizers_list)
            # print np.shape(X_q_batch)
            if args.language_only:
                X_batch = X_q_batch
            else:
                X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
                X_i_batch_normalized = preprocessing.normalize(X_i_batch,
                                                               norm='l2')
                X_batch = np.hstack((X_q_batch, X_i_batch_normalized))

            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_batch, Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])

        if (k + 1) % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k + 1))

        loss_list[k] = loss
        f1.write(str(loss_list[k]) + ' ')

        # print '	Results on Training set: '

        # Acc_train[k] = ValidationMLP(k, model, questions_train, answers_train_all, images_train, img_map, VGGfeatures, labelencoder, \
        # 	 args.batch_size, nb_classes, results_path+'_train', model_file_name+'_train', args.featureType, args.language_only)

        print '	Results on Validation set: '

        Acc_val[k] = ValidationMLP(k, model, questions_val, answers_val, images_val, img_map, VGGfeatures, labelencoder, \
          args.batch_size, nb_classes, results_path, model_file_name, args.featureType, args.language_only)
        f1.write(str(Acc_val[k]) + '\n')

    f1.close()
    plt.figure(1)
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.title('Accuracy on Training and Validation set')
    # plt.plot(range(args.num_epochs), Acc_train, 'b-', label = 'Accuracy on Training set')
    # plt.hold(True)
    plt.plot(range(args.num_epochs),
             Acc_val,
             'r--',
             label='Accuracy on Validation set')
    plt.legend(loc='lower right')
    plt.savefig('../pic/accuracy_train_val' + args.featureType + '.png')

    plt.figure(2)
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.title('Convergence curve')
    plt.plot(range(args.num_epochs), loss_list, 'r--')
    plt.savefig('../pic/Convergence_curve' + args.featureType + '.png')

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))