Exemple #1
0
def get_training_stats(model_id, model, one_hot_words, one_hot_dictionary):
    vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id)

    max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id)
    ref = {}
    hypo = {}
    # Transform sentences into one hot vector sentences
    with open(sentence_path + 'sents_train_lc_nopunc.txt') as f:
        lines = f.readlines()  # read the lines into an array
    random.shuffle(lines)
    IDS = []
    X = []
    Y = []
    for line in lines:
        IDS.append(line.rstrip().split('\t')[0])
        ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]] # need at least two ref for bleu
        X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy')
        Y_sample = []
        for word in line.rstrip().split('\t')[1].split():
            Y_sample.append(one_hot_words[word])
        Y_sample.append(one_hot_words['<eos>'])

        X.append(X_sample)
        Y.append(Y_sample)

    X = pad_sequences(X, maxlen=max_feat_len)
    Y = pad_sequences(Y, maxlen=max_sent_len)

    loss = model.evaluate(X, Y, batch_size=64, verbose=1)
    preds = model.predict_classes(X, batch_size=64, verbose=1)
    print(loss)

    for i in range(len(IDS)):
        pred_str = ''
        for word in preds[i]:
            if one_hot_dictionary[word] == '<eos>':
                break
            pred_str += one_hot_dictionary[word] + ' '

        hypo[IDS[i]] = [pred_str]

    return loss, score(ref, hypo)["METEOR"]
Exemple #2
0
def train(model_id, sentence_path, feature_path, nb_epoch, batch_size, model_path, load_epoch, extra_path=None):
    t_la = [[], []]
    t_l = [[], []]
    t_a = [[], []]
    v_l = [[], []]
    v_a = [[], []]

    # load vocab
    vocab_size, one_hot_words, one_hot_dictionary = rnn_utils.get_vocab(sentence_path, extra_path)

    # load model
    model = rnn_models.get_model_from_id(model_id, vocab_size)  # maybe fix so 128 not hardcoded
    model_path = model_path + model_id

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    log = open(model_path + '/log.txt', "a")
    log.write('\n\n\nTraining initialised: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
    if load_epoch == 0:
        print('\nTraining model from scratch...')
    else:
        if load_epoch < 0 or load_epoch is None:  # get latest
            for i in range(100, -1, -1):
                if os.path.isfile(model_path + '/' + model_id + '_' + str(i) + '.h5'):
                    load_epoch = i
                    break

            if load_epoch is None:
                load_epoch = 0

            if load_epoch == 0:
                print('\nTraining model from scratch...')
            else:
                print('\nLoading past model to train from:')
                print('\n' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5')
                [t_la, t_l, t_a, v_l, v_a] = np.load(model_path + '/training_stats_' + str(load_epoch) + '.npy')
                model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5')
    model = rnn_models.compile_model(model_id, model)
    # model.load_weights('/media/hayden/Storage1/UBUNTU/MODELS/SENTENCE/KERAS/GRU_01/GRU_01_500.h5')

    vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id)

    max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id)

    # TRAIN LOOP
    start_time = time.clock()
    fig = None
    max_feat_val = -1
    best_model = None
    best_a = -1
    best_e = None
    for e in range(1, nb_epoch+1):
        print(
            "--------------------------------------------\nepoch %d\n--------------------------------------------" % e)
        log.write(
            "\n--------------------------------------------\nepoch %d\n--------------------------------------------" % e)
        # get data
        # Transform sentences into one hot vector sentences
        with open(sentence_path + 'sents_train_lc_nopunc.txt') as f:
            lines = f.readlines()  # read the lines into an array
        random.shuffle(lines)

        if extra_path is not None:
            if max_feat_val < 0:
                min_feat_len = 1000000
                min_feat_val = 1000000
                max_feat_val = -1
                for line in lines:
                    X_sample = np.load(
                        feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy')

                    min_feat_len = min(min_feat_len, np.shape(X_sample)[0])
                    min_feat_val = min(min_feat_val,np.min(X_sample))
                    max_feat_val = max(max_feat_val,np.max(X_sample))

            with open(extra_path) as f:
                extra_lines = f.readlines()  # read the lines into an array
            random.shuffle(extra_lines)

            extra_lines = extra_lines[:1000]

            log.write('EXTRA LINES INCLUDED: '+extra_path+'\n')
            print('EXTRA LINES INCLUDED: '+extra_path)
            print('Will train on these first')
            X_batch = []
            Y_batch = []
            count = 0
            past = 0
            inner_count = 0
            sum_loss = 0
            start_time_inner = time.clock()
            for line in extra_lines:
                count += 1
                if int(model_id[8:9]) == 1:
                    X_sample = np.random.rand(randint(min_feat_len, max_feat_len-1), fs)*(max_feat_val-min_feat_val)+min_feat_val # RANDOM X SIMILAR TO NORMAL X
                elif int(model_id[8:9]) == 2:
                    X_sample = np.zeros((randint(min_feat_len, max_feat_len-1), fs)) # ZEROS
                Y_sample = []
                for word in line.rstrip().split('\t')[1].split():
                    Y_sample.append(one_hot_words[word])
                Y_sample.append(one_hot_words['<eos>'])

                # Check lengths of sequences to be sure they lower than current maxs (check for train val and test)
                # if np.shape(X_sample)[0] > max_feat_len:
                #     print np.shape(X_sample)[0]
                # if np.shape(Y_sample)[0] > max_sent_len:
                #     print np.shape(Y_sample)[0]

                X_batch.append(X_sample)
                Y_batch.append(Y_sample)

                if (count % batch_size == 0) or (count == len(extra_lines)):
                    X_batch = pad_sequences(X_batch, maxlen=max_feat_len, padding='pre')
                    Y_batch = pad_sequences(Y_batch, maxlen=max_sent_len, padding='post')

                    # h = model.fit(X_batch, Y_batch, batch_size=batch_size, nb_epoch=1)
                    loss = model.train_on_batch(X_batch, Y_batch)
                    inner_count += 1
                    sum_loss+=loss

                    X_batch = []
                    Y_batch = []
                    if (int((float(count) / len(extra_lines)) * 100) > past) or (count == len(extra_lines)):
                        tr = (len(extra_lines) - count) / ((count) / (time.clock() - start_time_inner))
                        trt = ((nb_epoch - e + 1) * len(extra_lines) - count) / (
                            ((e - 1) * len(extra_lines) + count) / (time.clock() - start_time))
                        print('(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                            past, sum_loss / inner_count, count, len(extra_lines), int((tr / 60) / 60),
                            int((tr / 60) % 60),
                            int(tr % 60),
                            int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))
                        log.write(
                            '\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                                past, sum_loss / inner_count, count, len(extra_lines), int((tr / 60) / 60),
                                int((tr / 60) % 60),
                                int(tr % 60),
                                int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))

                        # log.close()
                        # log = open(model_path + '/log.txt', "a")
                        # log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                        #     past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60),
                        #     int(tr % 60),
                        #     int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))

                        past += 10
                        sum_loss = 0
                        inner_count = 0
            log.write('\n--------------------------------------------')
            print('\n--------------------------------------------')

        X_batch = []
        Y_batch = []
        count = 0
        past = 0
        inner_count = 0
        sum_loss = 0
        start_time_inner = time.clock()
        for line in lines:
            count += 1
            X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy')
            Y_sample = []
            for word in line.rstrip().split('\t')[1].split():
                Y_sample.append(one_hot_words[word])
            Y_sample.append(one_hot_words['<eos>'])

            # Check lengths of sequences to be sure they lower than current maxs (check for train val and test)
            # if np.shape(X_sample)[0] > max_feat_len:
            #     print np.shape(X_sample)[0]
            # if np.shape(Y_sample)[0] > max_sent_len:
            #     print np.shape(Y_sample)[0]

            X_batch.append(X_sample)
            Y_batch.append(Y_sample)

            if (count % batch_size == 0) or (count == len(lines)):
                X_batch = pad_sequences(X_batch, maxlen=max_feat_len, padding='pre')
                Y_batch = pad_sequences(Y_batch, maxlen=max_sent_len, padding='post')

                # h = model.fit(X_batch, Y_batch, batch_size=batch_size, nb_epoch=1)
                loss = model.train_on_batch(X_batch, Y_batch)
                inner_count += 1
                sum_loss+=loss

                X_batch = []
                Y_batch = []
                if (int((float(count) / len(lines)) * 100) > past) or (count == len(lines)):
                    tr = (len(lines) - count) / ((count) / (time.clock() - start_time_inner))
                    trt = ((nb_epoch - e + 1) * len(lines) - count) / (
                        ((e - 1) * len(lines) + count) / (time.clock() - start_time))
                    print('(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                        past, sum_loss / inner_count, count, len(lines), int((tr / 60) / 60), int((tr / 60) % 60),
                        int(tr % 60),
                        int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))
                    log.write(
                        '\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                            past, sum_loss / inner_count, count, len(lines), int((tr / 60) / 60),
                            int((tr / 60) % 60),
                            int(tr % 60),
                            int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))
                    # log.close()
                    # log = open(model_path + '/log.txt', "a")
                    # log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                    #     past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60),
                    #     int(tr % 60),
                    #     int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))

                    past += 10
                    sum_loss = 0
                    inner_count = 0

        t_la[0].append(e)
        t_la[1].append(loss)

        tr = (nb_epoch - e) / (e / (time.clock() - start_time))
        print('TR: %02d:%02d:%02d;' % (int((tr / 60) / 60), int((tr / 60) % 60), int(tr % 60)))

        ####################################################################
        if e % 10 == 0:
            l, a = get_training_stats(model_id, model, one_hot_words, one_hot_dictionary)

            t_l[0].append(e)
            t_l[1].append(l)
            t_a[0].append(e)
            t_a[1].append(a)

            l, a = get_val_stats(model_id, model, one_hot_words, one_hot_dictionary)

            v_l[0].append(e)
            v_l[1].append(l)
            v_a[0].append(e)
            v_a[1].append(a)
            log.write('\n -- Val: METEOR --')
            log.write('\n' + str(a))
            log.write('\n -----------------')

            if a > best_a:
                best_a = a
                best_e = e
                best_model = model

            # graph it
            if fig:
                plt.close()
            fig, ax1 = plt.subplots()
            ax1.plot(t_la[0], t_la[1], 'g-')
            # ax1.plot(t_l[0], t_l[1], 'b-')
            # ax1.plot(v_l[0], v_l[1], 'r-')
            ax2 = ax1.twinx()
            ax2.plot(t_a[0], t_a[1], 'b--')
            ax2.plot(v_a[0], v_a[1], 'r--')
            # plt.plot(t_l[0], t_l[1])
            # plt.plot(v_l[0],v_l[1])
            # plt.plot(v_a[0],v_a[1])
            plt.show(block=False)

        if e % 20 == 0:
            if not os.path.exists(model_path):
                os.makedirs(model_path)
            model.save_weights(model_path + '/' + model_id + '_' + str(e) + '.h5',overwrite=True)
            fig.savefig(model_path + '/training.png')
            fig.savefig(model_path + '/training.pdf')
            np.save(model_path + '/training_stats_' + str(e) + '.npy', [t_la, t_l, t_a, v_l, v_a])

            results_path = model_path + '/RESULTS/'
            if not os.path.exists(results_path):
                os.makedirs(results_path)
            splits = ['train','val','test']
            for split in splits:
                print(split)
                loss, scores, ref, hypo = rnn_test.test(split, model_id, model, sentence_path, feature_path, one_hot_words, one_hot_dictionary)
                output = "%f\n%s\n" % (loss, str(scores))
                for k in ref:
                    output += '________________________\n'
                    output += k + '\n'
                    output += ref[k][0] + '\n'
                    output += hypo[k][0] + '\n'

                with open(results_path + split + '_' + str(e) + '.txt', 'w') as f:
                    f.write(output)

                if e == nb_epoch:
                    print(output)

    # save best
    log.write('\nBest Epoch: '+str(best_e)+' with val METEOR of '+str(best_a))
    best_model.save_weights(model_path + '/' + model_id + '_BEST_' + str(best_e) + '.h5', overwrite=True)
    log.close()
    return model