Exemple #1
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(prog='trainLSTM_MLP.py',
            description='Train LSTM-MLP model for visual question answering')
    parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>')
    parser.add_argument('--lstm-hidden-units', type=int, default=512, metavar='<lstm-hidden-units>')
    parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>')
    parser.add_argument('--lstm-hidden-layers', type=int, default=1, metavar='<lstm-hidden-layers>')
    parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>')
    parser.add_argument('--mlp-activation', type=str, default='tanh', metavar='<activation-function>')
    parser.add_argument('--num-epochs', type=int, default=100, metavar='<num-epochs>')
    parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>')
    parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>')
    parser.add_argument('--dev-accuracy-path', type=str, required=True, metavar='<accuracy-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    max_len = 30
    ######################
    #      Load Data     #
    ######################
    data_dir = '/home/mlds/data/0.05_val/'

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train', data_dir)
    dev_id_pairs, dev_image_ids = LoadIds('dev', data_dir)

    train_questions = LoadQuestions('train', data_dir)
    dev_questions = LoadQuestions('dev', data_dir)

    train_choices = LoadChoices('train', data_dir)
    dev_choices = LoadChoices('dev', data_dir)

    train_answers = LoadAnswers('train', data_dir)
    dev_answers = LoadAnswers('dev', data_dir)

    print('Finished loading data.')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    # Model Descriptions #
    ######################
    print('Generating and compiling model...')

    # image model (CNN features)
    image_model = Sequential()
    image_model.add(Reshape(
        input_shape=(img_dim,), dims=(img_dim,)
        ))

    # language model (LSTM)
    language_model = Sequential()
    if args.lstm_hidden_layers == 1:
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(max_len, word_vec_dim)
            ))
    else:
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(max_len, word_vec_dim)
            ))
        for i in range(args.lstm_hidden_layers-2):
            language_model.add(LSTM(
                output_dim=args.lstm_hidden_units, return_sequences=True
                ))
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=False
            ))

    # feedforward model (MLP)
    model = Sequential()
    model.add(Merge(
        [language_model, image_model], mode='concat', concat_axis=1
        ))
    for i in range(args.mlp_hidden_layers):
        model.add(Dense(
            args.mlp_hidden_units, init='uniform'
            ))
        model.add(Activation(args.mlp_activation))
        model.add(Dropout(args.dropout))
    model.add(Dense(word_vec_dim))
    #model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/vgg_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f' % (args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout)
    #model_filename = 'models/vgg_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f_loss_cosine' % (args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    rmsprop = RMSprop(lr=args.learning_rate)
    #model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
    model.compile(loss=Loss, optimizer=rmsprop)
    print('Compilation finished.')
    print('Time: %f s' % (time.time()-start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time()-start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_question_batches = [ b for b in MakeBatches(train_questions, args.batch_size, fillvalue=train_questions[-1]) ]
    train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ]
    train_image_batches = [ b for b in MakeBatches(train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ]
    train_indices = list(range(len(train_question_batches)))

    # validation batches
    dev_question_batches = [ b for b in MakeBatches(dev_questions, args.batch_size, fillvalue=dev_questions[-1]) ]
    dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ]
    dev_choice_batches = [ b for b in MakeBatches(dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ]
    dev_image_batches = [ b for b in MakeBatches(dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time()-start_time))


    ######################
    #      Training      #
    ######################

    acc_file = open(args.dev_accuracy_path, 'w')
    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    # define interrupt handler
    def PrintDevAcc():
        print('Max validation accuracy epoch: %i' % max_acc_epoch)
        print(dev_accs)

    def InterruptHandler(sig, frame):
        print(str(sig))
        PrintDevAcc()
        sys.exit(-1)

    signal.signal(signal.SIGINT, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    # print training information
    print('-'*80)
    print('Training Information')
    print('# of LSTM hidden units: %i' % args.lstm_hidden_units)
    print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers)
    print('# of MLP hidden units: %i' % args.mlp_hidden_units)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers)
    print('Dropout: %f' % args.dropout)
    print('MLP activation function: %s' % args.mlp_activation)
    print('# of training epochs: %i' % args.num_epochs)
    print('Batch size: %i' % args.batch_size)
    print('Learning rate: %f' % args.learning_rate)
    print('# of train questions: %i' % len(train_questions))
    print('# of dev questions: %i' % len(dev_questions))
    print('-'*80)
    acc_file.write('-'*80 + '\n')
    acc_file.write('Training Information\n')
    acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units)
    acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers)
    acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units)
    acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers)
    acc_file.write('Dropout: %f\n' % args.dropout)
    acc_file.write('MLP activation function: %s\n' % args.mlp_activation)
    acc_file.write('# of training epochs: %i\n' % args.num_epochs)
    acc_file.write('Batch size: %i\n' % args.batch_size)
    acc_file.write('Learning rate: %f\n' % args.learning_rate)
    acc_file.write('# of train questions: %i\n' % len(train_questions))
    acc_file.write('# of dev questions: %i\n' % len(dev_questions))
    acc_file.write('-'*80 + '\n')

    # start training
    print('Training started...')
    for k in range(args.num_epochs):
        print('-'*80)
        print('Epoch %i' % (k+1))
        progbar = generic_utils.Progbar(len(train_indices)*args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map)
            X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, VGG_features)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map)
            loss = model.train_on_batch([X_question_batch, X_image_batch], Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])
        print('Time: %f s' % (time.time()-start_time))

        # evaluate on dev set
        pbar = generic_utils.Progbar(len(dev_question_batches)*args.batch_size)

        dev_correct = 0

        # feed forward
        for i in range(len(dev_question_batches)):
            X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map)
            X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, VGG_features)
            prob = model.predict_proba([X_question_batch, X_image_batch], args.batch_size, verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            if i != (len(dev_question_batches)-1):
                dev_correct += np.count_nonzero(dev_answer_batches[i]==pred)
            else:
                num_padding = args.batch_size * len(dev_question_batches) - len(dev_questions)
                last_idx = args.batch_size - num_padding
                dev_correct += np.count_nonzero(dev_answer_batches[:last_idx]==pred[:last_idx])
            pbar.add(args.batch_size)

        dev_acc = float(dev_correct)/len(dev_questions)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Time: %f s' % (time.time()-start_time))

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1))
    print(dev_accs)
    for acc in dev_accs:
        acc_file.write('%f\n' % acc)
    print('Best validation accuracy: %f; epoch#%i' % (max_acc,(max_acc_epoch+1)))
    acc_file.write('Best validation accuracy: %f; epoch#%i\n' % (max_acc,(max_acc_epoch+1)))
    print('Training finished.')
    acc_file.write('Training finished.\n')
    print('Time: %f s' % (time.time()-start_time))
    acc_file.write('Time: %f s\n' % (time.time()-start_time))
    acc_file.close()
def main():
    start_time = time.time()
    signal.signal(signal.SIGINT, InterruptHandler)
    #signal.signal(signal.SIGKILL, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    parser = argparse.ArgumentParser(
        prog='trainMLP.py',
        description='Train MLP model for visual question answering')
    parser.add_argument('--mlp-hidden-units',
                        type=int,
                        default=1024,
                        metavar='<mlp-hidden-units>')
    parser.add_argument('--mlp-hidden-layers',
                        type=int,
                        default=3,
                        metavar='<mlp-hidden-layers>')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.5,
                        metavar='<dropout-rate>')
    parser.add_argument('--mlp-activation',
                        type=str,
                        default='relu',
                        metavar='<activation-function>')
    parser.add_argument('--num-epochs',
                        type=int,
                        default=100,
                        metavar='<num-epochs>')
    parser.add_argument('--model-save-interval',
                        type=int,
                        default=5,
                        metavar='<interval>')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='<batch-size>')
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train')
    dev_id_pairs, dev_image_ids = LoadIds('dev')

    train_choices = LoadChoices('train')
    dev_choices = LoadChoices('dev')

    train_answers = LoadAnswers('train')
    dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    print('-' * 100, file=sys.stderr)
    print('Training Information', file=sys.stderr)
    print('# of MLP hidden units: %i' % args.mlp_hidden_units, file=sys.stderr)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers,
          file=sys.stderr)
    print('Dropout: %f' % args.dropout, file=sys.stderr)
    print('MLP activation function: %s' % args.mlp_activation, file=sys.stderr)
    print('# of training epochs: %i' % args.num_epochs, file=sys.stderr)
    print('Batch size: %i' % args.batch_size, file=sys.stderr)
    print('# of train images: %i' % len(train_image_ids), file=sys.stderr)
    print('# of dev images: %i' % len(dev_image_ids), file=sys.stderr)
    print('-' * 100, file=sys.stderr)

    ######################
    # Model Descriptions #
    ######################

    # MLP model
    model = Sequential()
    model.add(Dense(output_dim=args.mlp_hidden_units, input_dim=img_dim))
    model.add(Activation(args.mlp_activation))
    model.add(Dropout(args.dropout))
    for i in range(args.mlp_hidden_layers - 1):
        model.add(Dense(args.mlp_hidden_units))
        model.add(Activation(args.mlp_activation))
        model.add(Dropout(args.dropout))
    model.add(Dense(word_vec_dim))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/mlp_units_%i_layers_%i' % (args.mlp_hidden_units,
                                                        args.mlp_hidden_layers)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adagrad')
    print('Compilation finished.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_answer_batches = [
        b for b in MakeBatches(train_answers['toks'],
                               args.batch_size,
                               fillvalue=train_answers['toks'][-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, args.batch_size, fillvalue=train_image_ids[-1])
    ]
    train_indices = list(range(len(train_image_ids)))

    # validation batches
    dev_answer_batches = [
        b for b in MakeBatches(dev_answers['labs'],
                               args.batch_size,
                               fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, args.batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Training      #
    ######################

    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    print('Training started...')
    for k in range(args.num_epochs):
        print('Epoch %i' % (k + 1), file=sys.stderr)
        print('-' * 80)
        print('Epoch %i' % (k + 1))
        progbar = generic_utils.Progbar(len(train_indices) * args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_image_batch = GetImagesMatrix(train_image_batches[i], img_map,
                                            VGG_features)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i],
                                              word_embedding, word_map)
            loss = model.train_on_batch(X_image_batch, Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])

        if k % args.model_save_interval == 0:
            model.save_weights(model_filename +
                               '_epoch_{:03d}.hdf5'.format(k + 1),
                               overwrite=True)

        # evaluate on dev set
        widgets = [
            'Evaluating ',
            Percentage(), ' ',
            Bar(marker='#', left='[', right=']'), ' ',
            ETA()
        ]
        pbar = ProgressBar(widgets=widgets, redirect_stdout=True)

        dev_correct = 0

        for i in pbar(range(len(dev_image_batches))):
            # feed forward
            X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map,
                                            VGG_features)
            prob = model.predict_proba(X_image_batch,
                                       args.batch_size,
                                       verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i],
                                            word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(
                    cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)

        dev_acc = float(dev_correct) / len(dev_image_ids)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Validation Accuracy: %f' % dev_acc, file=sys.stderr)
        print('Time: %f s' % (time.time() - start_time))
        print('Time: %f s' % (time.time() - start_time), file=sys.stderr)

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1))
    print(dev_accs, file=sys.stderr)
    print('Best validation accuracy: epoch#%i' % max_acc_epoch)
    print('Training finished.')
    print('Training finished.', file=sys.stderr)
    print('Time: %f s' % (time.time() - start_time))
    print('Time: %f s' % (time.time() - start_time), file=sys.stderr)
def main():
    start_time = time.time()
    signal.signal(signal.SIGINT, InterruptHandler)
    #signal.signal(signal.SIGKILL, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    parser = argparse.ArgumentParser(
        prog='trainLSTM.py',
        description='Train LSTM model for visual question answering')
    parser.add_argument('--lstm-hidden-units',
                        type=int,
                        default=512,
                        metavar='<lstm-hidden-units>')
    parser.add_argument('--lstm-hidden-layers',
                        type=int,
                        default=1,
                        metavar='<lstm-hidden-layers>')
    #parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>')
    parser.add_argument('--num-epochs',
                        type=int,
                        default=100,
                        metavar='<num-epochs>')
    #parser.add_argument('--model-save-interval', type=int, default=5, metavar='<interval>')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='<batch-size>')
    args = parser.parse_args()

    Inc_features_dim = 2048
    word_vec_dim = 300
    max_len = 30
    data_dir = '/home/mlds/data/0.2_val/'
    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train', data_dir)
    dev_id_pairs, dev_image_ids = LoadIds('dev', data_dir)

    train_questions = LoadQuestions('train', data_dir)
    dev_questions = LoadQuestions('dev', data_dir)

    train_choices = LoadChoices('train', data_dir)
    dev_choices = LoadChoices('dev', data_dir)

    train_answers = LoadAnswers('train', data_dir)
    dev_answers = LoadAnswers('dev', data_dir)

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    print('-' * 100, file=sys.stderr)
    print('Training Information', file=sys.stderr)
    print('# of LSTM hidden units: %i' % args.lstm_hidden_units,
          file=sys.stderr)
    print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers,
          file=sys.stderr)
    #print('Dropout: %f' % args.dropout, file=sys.stderr)
    print('# of training epochs: %i' % args.num_epochs, file=sys.stderr)
    print('Batch size: %i' % args.batch_size, file=sys.stderr)
    print('# of train questions: %i' % len(train_questions), file=sys.stderr)
    print('# of dev questions: %i' % len(dev_questions), file=sys.stderr)
    print('-' * 100, file=sys.stderr)

    ######################
    # Model Descriptions #
    ######################

    # LSTM model
    model = Sequential()
    model.add(
        LSTM(output_dim=args.lstm_hidden_units,
             return_sequences=True,
             input_shape=(max_len, Inc_features_dim + word_vec_dim)))
    for i in range(args.lstm_hidden_layers - 2):
        model.add(
            LSTM(output_dim=args.lstm_hidden_units, return_sequences=True))
    model.add(LSTM(output_dim=word_vec_dim, return_sequences=False))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/lstm_units_%i_layers_%i' % (
        args.lstm_hidden_units, args.lstm_hidden_layers)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print('Compilation finished.')
    print('Time: %f s' % (time.time() - start_time))

    ##############################################
    #  Load Word Vectors and Inception Features  #
    ##############################################

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    print('Loading Inception features...')
    Inc_features, img_map = LoadInceptionFeatures()
    print('Inception features loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_question_batches = [
        b for b in MakeBatches(
            train_questions, args.batch_size, fillvalue=train_questions[-1])
    ]
    train_answer_batches = [
        b for b in MakeBatches(train_answers['toks'],
                               args.batch_size,
                               fillvalue=train_answers['toks'][-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, args.batch_size, fillvalue=train_image_ids[-1])
    ]
    train_indices = list(range(len(train_question_batches)))

    # validation batches
    dev_question_batches = [
        b for b in MakeBatches(
            dev_questions, args.batch_size, fillvalue=dev_questions[-1])
    ]
    dev_answer_batches = [
        b for b in MakeBatches(dev_answers['labs'],
                               args.batch_size,
                               fillvalue=dev_answers['labs'][-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, args.batch_size, fillvalue=dev_choices[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Training      #
    ######################

    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    print('Training started...')
    for k in range(args.num_epochs):
        print('Epoch %i' % (k + 1), file=sys.stderr)
        print('-' * 80)
        print('Epoch %i' % (k + 1))
        progbar = generic_utils.Progbar(len(train_indices) * args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_imgquestion_batch = GetImgQuestionsTensor(
                train_image_batches[i], Inc_features, img_map,
                train_question_batches[i], word_embedding, word_map)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i],
                                              word_embedding, word_map)
            loss = model.train_on_batch(X_imgquestion_batch, Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])

        #if k % args.model_save_interval == 0:
        #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1), overwrite=True)

        # evaluate on dev set
        progbar = generic_utils.Progbar(
            len(dev_question_batches) * args.batch_size)

        dev_correct = 0

        for i in range(len(dev_question_batches)):
            # feed forward
            X_imgquestion_batch = GetImgQuestionsTensor(
                dev_image_batches[i], Inc_features, img_map,
                dev_question_batches[i], word_embedding, word_map)
            prob = model.predict_proba(X_imgquestion_batch,
                                       args.batch_size,
                                       verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i],
                                            word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(
                    cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)
            progbar.add(args.batch_size)

        dev_acc = float(dev_correct) / len(dev_questions)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Validation Accuracy: %f' % dev_acc, file=sys.stderr)
        print('Time: %f s' % (time.time() - start_time))
        print('Time: %f s' % (time.time() - start_time), file=sys.stderr)

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1))
    print(dev_accs, file=sys.stderr)
    print('Best validation accuracy: epoch#%i' % max_acc_epoch)
    print('Training finished.')
    print('Training finished.', file=sys.stderr)
    print('Time: %f s' % (time.time() - start_time))
    print('Time: %f s' % (time.time() - start_time), file=sys.stderr)
Exemple #4
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(
        prog='trainMemNN.py',
        description='Train MemmNN model for visual question answering')
    parser.add_argument('--mlp-hidden-units',
                        type=int,
                        default=1024,
                        metavar='<mlp-hidden-units>')
    parser.add_argument('--mlp-hidden-layers',
                        type=int,
                        default=3,
                        metavar='<mlp-hidden-layers>')
    parser.add_argument('--mlp-activation',
                        type=str,
                        default='tanh',
                        metavar='<activation-function>')
    parser.add_argument('--emb-dimension',
                        type=int,
                        default=50,
                        metavar='<embedding-dimension>')
    parser.add_argument('--num-epochs',
                        type=int,
                        default=100,
                        metavar='<num-epochs>')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='<batch-size>')
    parser.add_argument('--hops', type=int, default=3, metavar='<memnet-hops>')
    parser.add_argument('--learning-rate',
                        type=float,
                        default=0.001,
                        metavar='<learning-rate>')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        metavar='<dropout-rate>')
    parser.add_argument('--dev-accuracy-path',
                        type=str,
                        required=True,
                        metavar='<accuracy-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 300
    max_len = 30
    img_feature_num = 125
    ######################
    #      Load Data     #
    ######################
    data_dir = '/home/mlds/data/0.05_val/'

    print('Loading data...')

    train_q_ids, train_image_ids = LoadIds('train', data_dir)
    dev_q_ids, dev_image_ids = LoadIds('dev', data_dir)
    #test_q_ids,test_image_ids = LoadIds('test', data_dir)

    train_questions = LoadQuestions('train', data_dir)
    dev_questions = LoadQuestions('dev', data_dir)

    train_choices = LoadChoices('train', data_dir)
    dev_choices = LoadChoices('dev', data_dir)

    train_answers = LoadAnswers('train', data_dir)
    dev_answers = LoadAnswers('dev', data_dir)

    caption_map = LoadCaptions('train')
    '''
    caption_map_test = LoadCaptions('test')
    maxtrain=-1
    maxdev=-1
    maxtest=-1
    for img_id in train_image_ids:
        sent = caption_map[img_id]
        if len(sent) > maxtrain:
            maxtrain = len(sent)
    for img_id in dev_image_ids:
        sent = caption_map[img_id]
        if len(sent) > maxdev:
            maxdev = len(sent)
    for img_id in test_image_ids:
        sent = caption_map_test[img_id]
        if len(sent) > maxtest:
            maxtest = len(sent)
    print(maxtrain)
    print(maxdev)
    print(maxtest)
    sys.exit()
    '''

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    # Model Descriptions #
    ######################
    print('Generating and compiling model...')
    model = CreateGraph(args.emb_dimension, args.hops, args.mlp_activation,
                        args.mlp_hidden_units, args.mlp_hidden_layers,
                        word_vec_dim, img_dim, img_feature_num, args.dropout)

    json_string = model.to_json()
    model_filename = 'models/memNN.mlp_units_%i_layers_%i_%s.emb_dim_%i.hops_%i.lr%.1e.dropout_%.1e' % (
        args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation,
        args.emb_dimension, args.hops, args.learning_rate, args.dropout)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    rmsprop = RMSprop(lr=args.learning_rate)
    #model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
    model.compile(loss={'output': Loss}, optimizer=rmsprop)
    print('Compilation finished.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    '''
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time()-start_time))
    '''

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_question_batches = [
        b for b in MakeBatches(
            train_questions, args.batch_size, fillvalue=train_questions[-1])
    ]
    train_answer_batches = [
        b for b in MakeBatches(train_answers['toks'],
                               args.batch_size,
                               fillvalue=train_answers['toks'][-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, args.batch_size, fillvalue=train_image_ids[-1])
    ]
    train_indices = list(range(len(train_question_batches)))

    # validation batches
    dev_question_batches = [
        b for b in MakeBatches(
            dev_questions, args.batch_size, fillvalue=dev_questions[-1])
    ]
    dev_answer_batches = [
        b for b in MakeBatches(dev_answers['labs'],
                               args.batch_size,
                               fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, args.batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Training      #
    ######################

    acc_file = open(args.dev_accuracy_path, 'w')
    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    # define interrupt handler
    def PrintDevAcc():
        print('Max validation accuracy epoch: %i' % max_acc_epoch)
        print(dev_accs)

    def InterruptHandler(sig, frame):
        print(str(sig))
        PrintDevAcc()
        sys.exit(-1)

    signal.signal(signal.SIGINT, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    # print training information
    print('-' * 80)
    print('Training Information')
    print('# of MLP hidden units: %i' % args.mlp_hidden_units)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers)
    print('MLP activation function: %s' % args.mlp_activation)
    print('# of training epochs: %i' % args.num_epochs)
    print('Batch size: %i' % args.batch_size)
    print('Learning rate: %f' % args.learning_rate)
    print('# of train questions: %i' % len(train_questions))
    print('# of dev questions: %i' % len(dev_questions))
    print('-' * 80)
    acc_file.write('-' * 80 + '\n')
    acc_file.write('Training Information\n')
    acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units)
    acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers)
    acc_file.write('MLP activation function: %s\n' % args.mlp_activation)
    acc_file.write('# of training epochs: %i\n' % args.num_epochs)
    acc_file.write('Batch size: %i\n' % args.batch_size)
    acc_file.write('Learning rate: %f\n' % args.learning_rate)
    acc_file.write('# of train questions: %i\n' % len(train_questions))
    acc_file.write('# of dev questions: %i\n' % len(dev_questions))
    acc_file.write('-' * 80 + '\n')
    acc_file.close()

    # start training
    print('Training started...')
    for k in range(args.num_epochs):
        print('-' * 80)
        print('Epoch %i' % (k + 1))
        progbar = generic_utils.Progbar(len(train_indices) * args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_question_batch = GetQuestionsTensor(train_question_batches[i],
                                                  word_embedding, word_map)
            #X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, VGG_features)
            X_caption_batch = GetCaptionsTensor2(train_image_batches[i],
                                                 word_embedding, word_map,
                                                 caption_map)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i],
                                              word_embedding, word_map)
            loss = model.train_on_batch({
                'question': X_question_batch,
                'image': X_caption_batch,
                'output': Y_answer_batch
            })
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])
        print('Time: %f s' % (time.time() - start_time))

        # evaluate on dev set
        pbar = generic_utils.Progbar(
            len(dev_question_batches) * args.batch_size)

        dev_correct = 0

        # feed forward
        for i in range(len(dev_question_batches)):
            X_question_batch = GetQuestionsTensor(dev_question_batches[i],
                                                  word_embedding, word_map)
            #X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, VGG_features)
            X_caption_batch = GetCaptionsTensor2(dev_image_batches[i],
                                                 word_embedding, word_map,
                                                 caption_map)
            prob = model.predict_on_batch({
                'question': X_question_batch,
                'image': X_caption_batch
            })
            prob = prob[0]

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i],
                                            word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(
                    cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            if i != (len(dev_question_batches) - 1):
                dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)
            else:
                num_padding = args.batch_size * len(
                    dev_question_batches) - len(dev_questions)
                last_idx = args.batch_size - num_padding
                dev_correct += np.count_nonzero(
                    dev_answer_batches[:last_idx] == pred[:last_idx])
            pbar.add(args.batch_size)

        dev_acc = float(dev_correct) / len(dev_questions)
        dev_accs.append(dev_acc)
        with open(args.dev_accuracy_path, 'a') as acc_file:
            acc_file.write('%f\n' % dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Time: %f s' % (time.time() - start_time))

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1))
    acc_file = open(args.dev_accuracy_path, 'a')
    print(dev_accs)
    print('Best validation accuracy: %f; epoch#%i' % (max_acc,
                                                      (max_acc_epoch + 1)))
    acc_file.write('Best validation accuracy: %f; epoch#%i\n' %
                   (max_acc, (max_acc_epoch + 1)))
    print('Training finished.')
    acc_file.write('Training finished.\n')
    print('Time: %f s' % (time.time() - start_time))
    acc_file.write('Time: %f s\n' % (time.time() - start_time))
    acc_file.close()