コード例 #1
0
ファイル: train.py プロジェクト: HaydenFaulkner/phd
def train(model_id, model_path, data_paths_path, feature_path, nb_epoch, batch_size, load_epoch):

    start_time = time.clock()
    t_la = [[], []]
    t_l = [[], []]
    t_a = [[], []]
    v_l = [[], []]
    v_a = [[], []]
    fig = None

    model = models.get_model_from_id(model_id)
    if model is None:
        return

    model_path = model_path + model_id

    # Load log
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    log = open(model_path + '/log.txt', "a")
    log.write('\n\n\nTraining initialised: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))

    if load_epoch == 0:
        print 'Training model from scratch...'
        log.write('\nTraining model from scratch...')
    else:

        if load_epoch < 0 or load_epoch is None:  # get latest
            for i in range(100, -1, -1):
                if os.path.isfile(model_path + '/' + model_id + '_' + str(i) + '.h5'):
                    load_epoch = i
                    break

            if load_epoch is None:
                load_epoch = 0

    if load_epoch == 0:
        log.write('\nTraining model from scratch...')
    else:
        print 'Loading past model to train from:'
        print model_path + '/' + model_id + '_' + str(load_epoch) + '.h5'
        log.write('\nLoading past model to train from:')
        log.write('\n' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5')
        [t_l, v_l, v_a] = np.load(model_path + '/training_stats_' + str(load_epoch) + '.npy')
        model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5')

    model = models.compile_model(model_id, model)

    for e in range(load_epoch + 1, nb_epoch+1):
        print(
            "--------------------------------------------\nepoch %d\n--------------------------------------------" % e)
        log.write(
            "\n--------------------------------------------\nepoch %d\n--------------------------------------------" % e)
        # get data

        with open(data_paths_path + 'train_paths_equalised.txt') as f:
            all_paths = f.readlines()
        random.shuffle(all_paths)  # randomise order every epoch!!
        all_paths = [line.split() for line in all_paths]  # split so x and y split

        X_batch = []
        Y_batch = []
        sum_loss = 0
        past = 0
        count = 0
        inner_count = 0
        start_time_inner = time.clock()
        for path in all_paths:
            count += 1

            x, y = models.load_input(model_id, feature_path, path)
            X_batch.append(x)
            Y_batch.append(y)

            if (count % batch_size == 0) or (count == len(all_paths)):
                # print 'B'
                if count == len(all_paths):
                    inner_count + 1

                Y_batch = np.squeeze(Y_batch)
                loss, acc = model.train_on_batch(X_batch, Y_batch)
                sum_loss += loss
                inner_count += 1

                # clear batch
                X_batch = []
                Y_batch = []
                if (int((float(count) / len(all_paths)) * 100) > past) or (count == len(all_paths)):

                    tr = (len(all_paths) - count) / ((count) / (time.clock() - start_time_inner))
                    trt = ((nb_epoch - e + 1) * len(all_paths) - count) / (
                        ((e - 1) * len(all_paths) + count) / (time.clock() - start_time))
                    print '(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                        past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60),
                        int(tr % 60),
                        int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60))

                    log.close()
                    log = open(model_path + '/log.txt', "a")
                    log.write('\n(%d) [%.5f] Image: %d / %d; Epoch TR: %02d:%02d:%02d; Total TR: %02d:%02d:%02d;' % (
                        past, sum_loss / inner_count, count, len(all_paths), int((tr / 60) / 60), int((tr / 60) % 60),
                        int(tr % 60),
                        int((trt / 60) / 60), int((trt / 60) % 60), int(trt % 60)))

                    t_l[0].append((e - 1) + past * .01)
                    t_l[1].append(sum_loss / inner_count)

                    # graph it
                    if fig:
                        plt.close()
                    fig, ax1 = plt.subplots()
                    ax1.plot(t_l[0], t_l[1], 'g-')
                    ax1.plot(v_l[0], v_l[1], 'b-')
                    ax1.set_ylim(bottom=0)
                    ax2 = ax1.twinx()
                    ax2.plot(v_a[0], v_a[1], 'r-')
                    ax2.set_ylim(top=1)


                    # plt.plot(t_l[0], t_l[1])
                    # plt.plot(v_l[0],v_l[1])
                    # plt.plot(v_a[0],v_a[1])
                    # plt.show(block=False)

                    past += 10
                    sum_loss = 0
                    inner_count = 0
                    # if past > 0:
                    #     break


        print '--------------------------------------------'
        print 'Validation results:'
        log.write('\n--------------------------------------------')
        log.write('\nValidation results:\n')
        with open(data_paths_path + 'val_paths_equalised.txt') as f:
            all_val_paths = f.readlines()
        random.shuffle(all_val_paths)  # randomise order every epoch!!
        all_val_paths = [line.split() for line in all_val_paths]  # split so x and y split
        X_val = []
        Y_val = []
        count = 0
        past = 0
        val_metrics = []

        for path in all_val_paths:
            count += 1

            x, y = models.load_input(model_id, feature_path, path)
            X_val.append(x)
            Y_val.append(y)

            if (count % batch_size == 0) or (count == len(all_paths)):
                # test
                Y_val = np.squeeze(Y_val)
                val_metrics.append(model.test_on_batch(X_val, Y_val))

                # clear batch
                X_val = []
                Y_val = []
                if int((float(count) / len(all_val_paths)) * 100) > past:
                    print('.'),
                    log.write('.')
                    past += 10

        print '\n'
        val_results = np.average(val_metrics, axis=0)
        print val_results
        log.write('\n' + str(val_results))
        v_l[0].append(e)
        v_l[1].append(val_results[0])
        v_a[0].append(e)
        v_a[1].append(val_results[1])

        if e % 1 == 0:

            if not os.path.exists(model_path):
                os.makedirs(model_path)
            model.save_weights(model_path + '/' + model_id + '_' + str(e) + '.h5', overwrite=True)
            # fig.savefig(model_path + '/training.png')
            fig.savefig(model_path + '/training.pdf')
            np.save(model_path + '/training_stats_' + str(e) + '.npy', [t_l, v_l, v_a])

            tt = time.clock() - start_time
            print 'Total Time Taken: %02d:%02d:%02d;' % (int((tt / 60) / 60), int((tt / 60) % 60), int(tt % 60))

            log.write('\n\nTotal Time Taken: %02d:%02d:%02d;' % (int((tt / 60) / 60), int((tt / 60) % 60), int(tt % 60)))

    return model
コード例 #2
0
ファイル: predict.py プロジェクト: HaydenFaulkner/phd
def predict(model_id, model_path, data_paths_path, feature_path, split, batch_size=None, load_epoch=None, layers=['pred'], save_path=None, equalised=False):

    start_time = time.clock()
    output_classes = 7
    model = models.get_model_from_id(model_id)
    if model is None:
        return

    # Load log
    model_path = model_path + model_id
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if load_epoch is not None:
        print 'Loading model: ' + model_path + '/' + model_id + '_' + str(load_epoch) + '.h5'
        model.load_weights(model_path + '/' + model_id + '_' + str(load_epoch) + '.h5')
    else:
        print 'ERROR: Need load_epoch number to load'
        return

    # model = models.compile_model(model_id, model) # dont need to compile on prediction

    # get data
    if equalised:
        with open(data_paths_path + split + '_paths_equalised.txt') as f:
            all_paths = f.readlines()
    else:
        with open(data_paths_path + split + '_paths.txt') as f:
            all_paths = f.readlines()

    # all_paths = all_paths[:500]

    all_paths = [line.split() for line in all_paths]  # split so x and y split


    for layer_name in layers:
        # model, output_classes = models.get_model_from_id(model_id)
        model = Model(input=model.input, output=model.get_layer(layer_name).output)

        X_batch = []
        Y_batch = []
        Y_gt = None
        Y_pred = None
        past = 0
        count = 0
        inner_count = 0
        for path in all_paths:
            # print path
            count += 1
            cor_path = DRIVE + path[0][path[0].find('/DATASETS/')+1:]
            if path[0] != cor_path:
                # print 'Paths in .txt files seem incorrect'
                # print 'Changed from: '+path[0]
                # print 'Changed to: '+ cor_path
                path[0] = cor_path
            x, y = models.load_input(model_id, feature_path, path)

            X_batch.append(x)
            Y_batch.append(y)

            if batch_size is not None:

                if (count % batch_size == 0) or (count == len(all_paths)):
                    # Y_batch = np.eye(output_classes)[Y_batch]

                    # train
                    if Y_gt is None:
                        Y_pred = model.predict_on_batch(np.array(X_batch))
                        Y_gt = Y_batch
                    else:
                        Y_pred = np.append(Y_pred, model.predict_on_batch(np.array(X_batch)),axis=0)
                        Y_gt = np.append(Y_gt,Y_batch,axis=0)
                    inner_count += 1

                    # clear batches
                    X_batch, Y_batch = [], []

            else:
                Y_pred = model.predict(x)
                Y_gt = np.eye(output_classes)[y]

            if int((float(count) / len(all_paths)) * 100) > past:

                tr = (len(all_paths) - count) / ((count) / (time.clock() - start_time))
                print '(%d) Image: %d / %d; TR: %02d:%02d:%02d;' % (past, count, len(all_paths), int((tr / 60) / 60),
                                                                    int((tr / 60) % 60), int(tr % 60))

                past += 5

        # if batch_size is None:
        #     Y_pred = model.predict_proba(X_batch, batch_size=32)

        # save predictions to file
        all_array = {}
        if save_path is not None:
            print '\nSaving ....'
            save_path += model_id + '_' + str(load_epoch)+'/'
            print save_path
            for p in range(len(Y_gt)):
                path = all_paths[p]
                # write out to npy files
                image_name = path[0].split('/')[len(path[0].split('/')) - 1]

                if not os.path.exists(save_path + layer_name + '/npy/ind/'):
                    os.makedirs(save_path + layer_name + '/npy/ind/')

                np.save(save_path + layer_name + '/npy/ind/' + image_name[:-4] + '.npy', np.squeeze(Y_pred[p]))
                all_array[image_name[:-4]] = [Y_gt[p], Y_pred[p]]

            # np.save(save_path + layer_name + '/npy/'+split+'.npy', all_array)

    return Y_gt, Y_pred