def save_pickle(): answers = open('../data/ans_txt.txt', 'r').read().decode('utf8').splitlines() print(len(answers)) questions = open("../data/ques_txt.txt", 'r').read().decode('utf8').splitlines() images = open("../data/img_id_txt.txt", 'r').read().decode('utf8').splitlines() maxAnswers = 1000 questions, answers, images = selectFrequentAnswers(questions,answers,images, maxAnswers) encoder = preprocessing.LabelEncoder() encoder.fit(answers) print ("Number of classes: " + str(len(list(encoder.classes_)))) joblib.dump(encoder, '../data/encoder.pkl') return "DONE"
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=512) parser.add_argument('-num_lstm_layers', type=int, default=2) parser.add_argument('-dropout', type=float, default=0.2) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=5) parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() max_answers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers) print 'Loaded questions, sorting by length...' questions_lengths_train, questions_train, answers_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,'../models/labelencoder.pkl') max_len = 30 #25 is max for training, 27 is max for validation word_vec_dim = 300 model = Sequential() model.add(LSTM(output_dim = args.num_hidden_units, activation='tanh', return_sequences=True, input_shape=(max_len, word_vec_dim))) model.add(Dropout(args.dropout)) model.add(LSTM(args.num_hidden_units, return_sequences=False)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_lstm_layers_' + str(args.num_lstm_layers) + '_dropout_' + str(args.dropout) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' #set up word vectors # Code to choose the word vectors, default is Goldberg but GLOVE is preferred if args.word_vector == 'glove': nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') else: nlp = English() print 'loaded ' + args.word_vector + ' word2vec features...' ## training # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval) print 'Training started...' for k in xrange(args.num_epochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), grouper(images_train, args.batch_size, fillvalue=images_train[0])): timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_q_batch, Y_batch) # fix for the Keras v0.3 issue #9 progbar.add(args.batch_size, values=[("train loss", loss[0])]) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units_mlp', type=int, default=1024) parser.add_argument('-num_hidden_units_lstm', type=int, default=512) parser.add_argument('-num_hidden_layers_mlp', type=int, default=3) parser.add_argument('-num_hidden_layers_lstm', type=int, default=1) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation_mlp', type=str, default='tanh') parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=5) parser.add_argument('-batch_size', type=int, default=128) #TODO Feature parser.add_argument('-resume_training', type=str) #TODO Feature parser.add_argument('-language_only', type=bool, default= False) args = parser.parse_args() word_vec_dim = 300 img_dim = 4096 max_len = 30 nb_classes = 1000 #get the data questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open( '../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' max_answers = nb_classes questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, max_answers) questions_lengths_train, questions_train, answers_train, images_train = ( list(t) for t in zip(*sorted( zip(questions_lengths_train, questions_train, answers_train, images_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') image_model = Sequential() image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, ))) language_model = Sequential() if args.num_hidden_layers_lstm == 1: language_model.add( LSTM(output_dim=args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim))) else: language_model.add( LSTM(output_dim=args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim))) for i in xrange(args.num_hidden_layers_lstm - 2): language_model.add( LSTM(output_dim=args.num_hidden_units_lstm, return_sequences=True)) language_model.add( LSTM(output_dim=args.num_hidden_units_lstm, return_sequences=False)) model = Sequential() model.add( Merge([language_model, image_model], mode='concat', concat_axis=1)) for i in xrange(args.num_hidden_layers_mlp): model.add(Dense(args.num_hidden_units_mlp, init='uniform')) model.add(Activation(args.activation_mlp)) model.add(Dropout(args.dropout)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \ '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \ str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm) open(model_file_name + '.json', 'w').write(json_string) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done' features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features...' ## training print 'Training started...' for k in xrange(args.num_epochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): timesteps = len(nlp( qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries( qu_batch, nlp, timesteps) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) if k % args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=512) parser.add_argument('-num_lstm_layers', type=int, default=2) parser.add_argument('-dropout', type=float, default=0.2) parser.add_argument('-activation', type=str, default='tanh') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open( '../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() max_answers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, max_answers) print 'Loaded questions, sorting by length...' questions_lengths_train, questions_train, answers_train = ( list(t) for t in zip(*sorted( zip(questions_lengths_train, questions_train, answers_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') max_len = 30 #25 is max for training, 27 is max for validation word_vec_dim = 300 model = Sequential() model.add( LSTM(output_dim=args.num_hidden_units, activation='tanh', return_sequences=True, input_shape=(max_len, word_vec_dim))) model.add(Dropout(args.dropout)) model.add(LSTM(args.num_hidden_units, return_sequences=False)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_language_only_num_hidden_units_' + str( args.num_hidden_units) + '_num_lstm_layers_' + str( args.num_lstm_layers) + '_dropout_' + str(args.dropout) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' #set up word vectors nlp = English() print 'loaded word2vec features...' ## training print 'Training started...' numEpochs = 100 model_save_interval = 5 batchSize = 128 for k in xrange(numEpochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, batchSize, fillvalue=questions_train[0]), grouper(answers_train, batchSize, fillvalue=answers_train[0]), grouper(images_train, batchSize, fillvalue=images_train[0])): timesteps = len(nlp( qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries( qu_batch, nlp, timesteps) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_q_batch, Y_batch) progbar.add(batchSize, values=[("train loss", loss)]) if k % model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default=False) parser.add_argument('-num_epochs', type=int, default=10) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=128) args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() id_map = {} for ids in image_ids: id_split = ids.split() id_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add( Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add( Dense(args.num_hidden_units, input_dim=img_dim + word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers - 1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() if args.language_only: model_file_name = '../models/mlp_language_only_num_hidden_units_' + str( args.num_hidden_units) + '_num_hidden_layers_' + str( args.num_hidden_layers) else: model_file_name = '../models/mlp_num_hidden_units_' + str( args.num_hidden_units) + '_num_hidden_layers_' + str( args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' print 'Training started...' for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if args.language_only: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) #print type(loss) if k % args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default= False) parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,'../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() id_map = {} for ids in image_ids: id_split = ids.split() id_map[id_split[0]] = int(id_split[1]) # Code to choose the word vectors, default is Goldberg but GLOVE is preferred if args.word_vector == 'glove': nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') else: nlp = English() print 'loaded ' + args.word_vector + ' word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers-1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() if args.language_only: model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) else: model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' print 'Training started...' for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if args.language_only: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) # fix for the Keras v0.3 issue #9 progbar.add(args.batch_size, values=[("train loss", loss[0])]) #print type(loss) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): cwd = os.getcwd() parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default= False) parser.add_argument('-num_epochs', type=int, default=2) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-model_weights_path', type=str, default=cwd+'/vgg/vgg16_weights.h5') parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2015.txt') parser.add_argument('-answers_train',type=str, default = cwd+'/data/preprocessed/answers_train2015_modal.txt') parser.add_argument('-im_dir',type=str, default =cwd+'/data/preprocessed/scene_img_abstract_v002_train2015/') #parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt') args = parser.parse_args() questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines() answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines() images_train = open(cwd+'/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines() #vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change maxAnswers = 100 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl') #features_struct = scipy.io.loadmat(vgg_model_path) #VGGfeatures = features_struct['feats'] # print 'loaded vgg features' # image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines() # id_map = {} # for ids in image_ids: # id_split = ids.split() # id_map[id_split[0]] = int(id_split[1]) vgg_model = vgg16.VGG_16(args.model_weights_path) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') print 'loaded vgg model...' nlp = English() print 'loaded word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers-1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Training started...' id_map = {} f1 = open('abstract_image_precompute') f2 = open('abstract_image_precompute_reverse') VGGfeatures = np.loadtxt(f1) VGGfeatures_reverse = np.loadtxt(f2) f1.close() f2.close() for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) im_path = args.im_dir +"abstract_v002_train2015_" print 'getting image features...' X_i_batch = get_images_matrix(im_batch, VGGfeatures, VGGfeatures_reverse) # X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path, id_map) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) print 'running training on batch...' loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units_mlp', type=int, default=1024) parser.add_argument('-num_hidden_units_lstm', type=int, default=512) parser.add_argument('-num_hidden_layers_mlp', type=int, default=3) parser.add_argument('-num_hidden_layers_lstm', type=int, default=1) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation_mlp', type=str, default='tanh') parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=5) parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-word_vector', type=str, default='') #TODO Feature parser.add_argument('-resume_training', type=str) #TODO Feature parser.add_argument('-language_only', type=bool, default= False) args = parser.parse_args() word_vec_dim= 300 img_dim = 4096 max_len = 30 nb_classes = 1000 #get the data questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' max_answers = nb_classes questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers) questions_lengths_train, questions_train, answers_train, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, images_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,'../models/labelencoder.pkl') image_model = Sequential() image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,))) language_model = Sequential() if args.num_hidden_layers_lstm == 1: language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim))) else: language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim))) for i in xrange(args.num_hidden_layers_lstm-2): language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True)) language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False)) model = Sequential() model.add(Merge([language_model, image_model], mode='concat', concat_axis=1)) for i in xrange(args.num_hidden_layers_mlp): model.add(Dense(args.num_hidden_units_mlp, init='uniform')) model.add(Activation(args.activation_mlp)) model.add(Dropout(args.dropout)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \ '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \ str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm) open(model_file_name + '.json', 'w').write(json_string) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done' features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) # Code to choose the word vectors, default is Goldberg but GLOVE is preferred if args.word_vector == 'glove': nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') else: nlp = English() print 'loaded ' + args.word_vector + ' word2vec features...' ## training print 'Training started...' for k in xrange(args.num_epochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch) # fix for the Keras v0.3 issue #9 progbar.add(args.batch_size, values=[("train loss", loss[0])]) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
parser.add_argument('-batch_size', type=int, default=128) args = parser.parse_args() questions_train = open('./data/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('./data/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('./data/images_train2014.txt', 'r').read().decode('utf8').splitlines() logging.debug("Length of questions_train %d", len(questions_train)) logging.debug("Length of answers_train %d", len(answers_train)) logging.debug("Length of images_train %d", len(images_train)) maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, maxAnswers) logging.debug("Length of the lists after select Frequent Answers") logging.debug("Length of questions_train %d", len(questions_train)) logging.debug("Length of answers_train %d", len(answers_train)) logging.debug("Length of images_train %d", len(images_train)) # generates numerical labels for all the answers in answers_train. labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, 'labelencoder.pkl') # TODO Get vectors for each image from Sherlock and load them into an array here image_ids = open("./id_map.txt").read().splitlines() id_map = {}
def main(): print 'Train LSTM encoder + MLP decoder' parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units_mlp', type=int, default=1024) parser.add_argument('-num_hidden_units_lstm', type=int, default=512) parser.add_argument('-num_hidden_layers_mlp', type=int, default=3) parser.add_argument('-num_hidden_layers_lstm', type=int, default=1) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation_mlp', type=str, default='tanh') parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=5) parser.add_argument('-batch_size', type=int, default=4096) parser.add_argument('-gap_layer_units', type = int, default = 1024) #TODO Feature parser.add_argument('-resume_training', type=str) #TODO Feature parser.add_argument('-language_only', type=bool, default= False) args = parser.parse_args() word_vec_dim= 300 img_dim = 4096 max_len = 30 nb_classes = 1000 #get the data questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train_all = open('../data/preprocessed/answers_train2014_all.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' max_answers = nb_classes questions_train, answers_train, answers_train_all, images_train = selectFrequentAnswers(questions_train,answers_train, answers_train_all, images_train, max_answers) questions_lengths_train, questions_train, answers_train, answers_train_all, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, answers_train_all, images_train)))) questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_val = open('../data/preprocessed/questions_lengths_val2014.txt', 'r').read().decode('utf8').splitlines() answers_val = open('../data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() images_val = open('../data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' questions_lengths_val, questions_val, answers_val, images_val = (list(t) for t in zip(*sorted(zip(questions_lengths_val, questions_val, answers_val, images_val)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,'../models/labelencoder.pkl') image_input = Input(shape = (img_dim, ), name = 'image_input') # image_gap = Dense(args.gap_layer_units, activation = args.activation_mlp)(image_input) language_input = Input(shape = (None, word_vec_dim), name = 'language_input') lstm_out = LSTM(args.num_hidden_units_lstm)(language_input) # lstm_gap = Dense(args.gap_layer_units, activation = args.activation_mlp)(lstm_out) # x = keras.layers.concatenate([lstm_gap, image_gap]) x = keras.layers.concatenate([lstm_out, image_input]) x = Dense(1024, activation = args.activation_mlp)(x) x = Dropout(args.dropout)(x) x = Dense(512, activation = args.activation_mlp)(x) x = Dropout(args.dropout)(x) x = Dense(256, activation = args.activation_mlp)(x) x = Dropout(args.dropout)(x) main_output = Dense(nb_classes, activation = 'softmax', name = 'main_output')(x) model = Model(inputs = [language_input, image_input], outputs = [main_output]) # args.model = '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1.json' # args.weights = '../models/lstm_1_num_hidden_units_lstm_512_num_hidden_units_mlp_1024_num_hidden_layers_mlp_3_num_hidden_layers_lstm_1_epoch_100.hdf5' # model = model_from_json(open(args.model).read()) # model.load_weights(args.weights) json_string = model.to_json() model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \ '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \ str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm) results_path = '../results/lstm_decoder_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \ '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \ str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm) open(model_file_name + '.json', 'w').write(json_string) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done' features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features...' ## training print 'Training started...' Acc_train = [0] * args.num_epochs Acc_val = [0] * args.num_epochs loss_list = [0] * args.num_epochs f1 = open('../results/loss_accuracy_lstm_encoder.txt', 'a') f1.write(model_file_name + '\n') for k in xrange(args.num_epochs): print str(400 + k + 1) + 'th Iteration ...' progbar = generic_utils.Progbar(len(questions_train)) loss_sum = 0 it = 0 for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps) X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) X_i_batch_normalized = preprocessing.normalize(X_i_batch, norm='l2') #print X_i_batch.shape, X_q_batch.shape Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch([X_q_batch, X_i_batch_normalized], Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) it += 1 loss_sum += loss print " " + str(loss_sum / float(it)) if (k + 1)%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k + 1)) loss_list[k] = loss_sum /float(it) f1.write(str(loss_list[k]) + ' ') # print ' Results on Training set: ' # Acc_train[k] = Validation_LSTM_encoder(model, questions_train, answers_train_all, images_train, img_map ,VGGfeatures, labelencoder, \ # args.batch_size, nlp, nb_classes, results_path+'_train', model_file_name+'_train') print ' Results on Validation set: ' Acc_val[k] = Validation_LSTM_encoder(model, questions_val, answers_val, images_val, img_map ,VGGfeatures, labelencoder, \ args.batch_size, nlp, nb_classes, results_path, model_file_name) f1.write(str(Acc_val[k]) + '\n') f1.close() plt.figure(1) plt.xlabel('Iterations') plt.ylabel('Accuracy') plt.title('Accuracy on Training and Validation set') # plt.plot(range(args.num_epochs), Acc_train, 'b-', label = 'Accuracy on Training set') # plt.hold(True) plt.plot(range(args.num_epochs), Acc_val, 'r--', label = 'Accuracy on Validation set') plt.legend(loc = 'lower right') plt.savefig('accuracy_train_val.png') plt.figure(2) plt.xlabel('Iterations') plt.ylabel('Loss') plt.title('Convergence curve') plt.plot(range(args.num_epochs), loss_list, 'r--') plt.savefig('Convergence_curve.png') model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k+1))
def main(): print 'Train MLP' parser = argparse.ArgumentParser() parser.add_argument('-featureType', type=str, default='BoW') #BoW, WordsGlove, SentGlove parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default=False) parser.add_argument('-num_epochs', type=int, default=2000) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=2048) parser.add_argument('-num_top_all_words', type=int, default=1000) parser.add_argument('-num_top_start_words', type=int, default=10) parser.add_argument('-num_start_words', type=int, default=3) args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() answers_train_all = open('../data/preprocessed/answers_train2014_all.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, answers_train_all, images_train = selectFrequentAnswers( questions_train, answers_train, answers_train_all, images_train, maxAnswers) # print [answers_train.count(answers_train[i]) for i in range(1000)] print max([answers_train.count(answers_train[i]) for i in range(1000)]) print min([answers_train.count(answers_train[i]) for i in range(1000)]) print np.mean([answers_train.count(answers_train[i]) for i in range(1000)]) questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_val = open( '../data/preprocessed/questions_lengths_val2014.txt', 'r').read().decode('utf8').splitlines() answers_val = open('../data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() images_val = open('../data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) if args.featureType == 'WordsGlove' or args.featureType == 'SentGlove': nlp = English() print 'loaded word2vec features' elif args.featureType == 'BoW': num_top_all_words = args.num_top_all_words num_top_start_words = args.num_top_start_words num_start_words = args.num_start_words train_question_file = '../data/preprocessed/questions_train2014.txt' vectorizers_list = computeBoWfeatures(num_top_all_words, num_top_start_words, num_start_words, train_question_file) # print len(vectorizers_list) print 'computed BoW features' img_dim = 4096 if args.featureType == 'WordsGlove' or args.featureType == 'SentGlove': word_vec_dim = 300 elif args.featureType == 'BoW': word_vec_dim = num_top_all_words + num_top_start_words * num_start_words model = Sequential() if args.language_only: model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim)) else: model.add( Dense(1024, activation=args.activation, input_dim=img_dim + word_vec_dim)) model.add(Dropout(args.dropout)) model.add(Dense(1024, activation=args.activation)) model.add(Dropout(args.dropout)) model.add(Dense(1024, activation=args.activation)) model.add(Dropout(args.dropout)) # for i in xrange(args.num_hidden_layers - 1): # model.add(Dense(args.num_hidden_units, activation = args.activation)) # if args.dropout > 0: # model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' json_string = model.to_json() if args.language_only: model_file_name = '../models/' + args.featureType + '_mlp_language_only_num_hidden_units_' \ + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) else: model_file_name = '../models/' + args.featureType + '_mlp_num_hidden_units_' \ + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) if args.language_only: results_path = '../results/' + args.featureType + '_mlp_language_only_num_hidden_units_' \ + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) else: results_path = '../results/' + args.featureType + '_mlp_num_hidden_units_' \ + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) Acc_train = [0] * args.num_epochs Acc_val = [0] * args.num_epochs loss_list = [0] * args.num_epochs index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train_all = [answers_train_all[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] print 'Training started...' f1 = open('../results/loss_accuracy_mlp' + args.featureType + '.txt', 'a') f1.write(model_file_name + '\n') for k in xrange(args.num_epochs): print str(k + 1) + 'th Iteration' #shuffle the data points before going through them progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): if args.featureType == 'WordsGlove': X_q_batch = get_questions_matrix_sum(qu_batch, nlp) elif args.featureType == 'SentGlove': X_q_batch = get_questions_matrix_sentGlove(qu_batch, nlp) elif args.featureType == 'BoW': X_q_batch = get_questions_BoW(qu_batch, vectorizers_list) # print np.shape(X_q_batch) if args.language_only: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) X_i_batch_normalized = preprocessing.normalize(X_i_batch, norm='l2') X_batch = np.hstack((X_q_batch, X_i_batch_normalized)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) if (k + 1) % args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1)) loss_list[k] = loss f1.write(str(loss_list[k]) + ' ') # print ' Results on Training set: ' # Acc_train[k] = ValidationMLP(k, model, questions_train, answers_train_all, images_train, img_map, VGGfeatures, labelencoder, \ # args.batch_size, nb_classes, results_path+'_train', model_file_name+'_train', args.featureType, args.language_only) print ' Results on Validation set: ' Acc_val[k] = ValidationMLP(k, model, questions_val, answers_val, images_val, img_map, VGGfeatures, labelencoder, \ args.batch_size, nb_classes, results_path, model_file_name, args.featureType, args.language_only) f1.write(str(Acc_val[k]) + '\n') f1.close() plt.figure(1) plt.xlabel('Iterations') plt.ylabel('Accuracy') plt.title('Accuracy on Training and Validation set') # plt.plot(range(args.num_epochs), Acc_train, 'b-', label = 'Accuracy on Training set') # plt.hold(True) plt.plot(range(args.num_epochs), Acc_val, 'r--', label='Accuracy on Validation set') plt.legend(loc='lower right') plt.savefig('../pic/accuracy_train_val' + args.featureType + '.png') plt.figure(2) plt.xlabel('Iterations') plt.ylabel('Loss') plt.title('Convergence curve') plt.plot(range(args.num_epochs), loss_list, 'r--') plt.savefig('../pic/Convergence_curve' + args.featureType + '.png') model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))