Python circular_listの例

プログラミング言語: Python

名前空間/パッケージ名: utils.data_structures

メソッド/関数: circular_list

hotexamples.comのコード掲載数: 36

Python circular_list - 36件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.data_structures.circular_listの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: unimodal_with_val.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    input_dimension = config.getint('models', 'input_dimension')
    output_classes = config.getint('models', 'output_classes')
    lstm_size = config.getint('models', 'lstm_size')
    nonlinearity = options['nonlinearity'] if 'nonlinearity' in options else config.get('models', 'nonlinearity')

    if nonlinearity == 'sigmoid':
        nonlinearity = sigmoid
    if nonlinearity == 'rectify':
        nonlinearity = rectify

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    no_epochs = int(options['no_epochs']) if 'no_epochs' in options else config.getint('training', 'no_epochs')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize')
    batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training',
                                                                                                  'use_peepholes')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')

    data_matrix = data['dataMatrix']
    targets_vec = data['targetsVec'].reshape((-1,))
    subjects_vec = data['subjectsVec'].reshape((-1,))
    vidlen_vec = data['videoLengthVec'].reshape((-1,))

    data_matrix = sequencewise_mean_image_subtraction(data_matrix, vidlen_vec)

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec,
                   train_subject_ids, val_subject_ids, test_subject_ids)
    train_y += 1
    val_y += 1
    test_y += 1

    '''
    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1
    '''

    train_X = reorder_data(train_X, (30, 50))
    val_X = reorder_data(val_X, (30, 50))
    test_X = reorder_data(test_X, (30, 50))

    train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens)
    val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens)
    test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens)

    weights, biases = load_dbn(ae_pretrained)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = deltanet_majority_vote.create_model_using_pretrained_encoder(weights, biases,
                                                                           (None, None, input_dimension), inputs,
                                                                           (None, None), mask, lstm_size, window,
                                                                           output_classes, weight_init_fn,
                                                                           use_peepholes, nonlinearity)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = las.updates.adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [inputs, targets, mask, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, _ = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            val_cr = cr
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, WINDOW_SIZE, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('test CR: {}, val CR: {}, val loss: {}'.format(test_cr, val_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

    if 'write_results' in options:
        with open(options['write_results'], mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, val_cr, best_val))

    if 'save_best' in options:
        print('Saving the best model so far...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('Model Saved!')

コード例 #2

ファイルを表示

ファイル: unimodal_nodelta_with_val.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_finetuned = config.get('models', 'finetuned')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    load_finetune = config.getboolean('training', 'load_finetune')
    lstm_units = config.getint('training', 'lstm_units')
    output_units = config.getint('training', 'output_units')

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    if load_finetune:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()

    # recon = dbn.predict(test_X)
    # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44))
    # exit()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    # network = deltanet.create_model(dbn, (None, None, 1500), inputs,
    #                                 (None, None), mask, lstm_units, window, output_units)

    network = baseline_end2end.create_model(dbn, (None, None, 1500), inputs,
                                            (None, None), mask, lstm_units, output_units)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 45
    BATCH_SIZE = 20
    STRIP_SIZE = 3
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m)
            print('\r', end='')
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(test_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

    if 'save_best' in options:
        print('Saving the best model so far...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])

コード例 #3

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('CLI options: {}'.format(options.items()))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('stream1', 'data'))
    stream1_dim = config.getint('stream1', 'input_dimensions')

    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier',
                                   'output_classnames').split(',')
    matlab_target_offset = config.getboolean('lstm_classifier',
                                             'matlab_target_offset')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'lstm_classifier', 'weight_init')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'lstm_classifier', 'use_peepholes')
    windowsize = config.getint('lstm_classifier', 'windowsize')

    # data preprocessing options
    meanremove = config.getboolean('stream1', 'meanremove')
    samplewisenormalize = config.getboolean('stream1', 'samplewisenormalize')
    featurewisenormalize = config.getboolean('stream1', 'featurewisenormalize')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')

    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file(
        config.get('training', 'train_subjects_file'))
    val_subject_ids = read_data_split_file(
        config.get('training', 'val_subjects_file'))
    test_subject_ids = read_data_split_file(
        config.get('training', 'test_subjects_file'))

    data_matrix = data['dataMatrix'].astype('float32')
    targets_vec = data['targetsVec'].reshape((-1, ))
    subjects_vec = data['subjectsVec'].reshape((-1, ))
    vidlen_vec = data['videoLengthVec'].reshape((-1, ))

    if samplewisenormalize:
        data_matrix = normalize_input(data_matrix)

    if meanremove:
        data_matrix = sequencewise_mean_image_subtraction(
            data_matrix, vidlen_vec)

    data_matrix = concat_first_second_deltas(data_matrix, vidlen_vec,
                                             windowsize)

    train_dct, train_y, train_vidlens, train_subjects, \
    val_dct, val_y, val_vidlens, val_subjects, \
    test_dct, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec,
                                                                   train_subject_ids, val_subject_ids, test_subject_ids)
    if matlab_target_offset:
        train_y -= 1
        val_y -= 1
        test_y -= 1

    # featurewise normalize dct features
    if featurewisenormalize:
        train_dct, dct_mean, dct_std = featurewise_normalize_sequence(
            train_dct)
        val_dct = (val_dct - dct_mean) / dct_std
        test_dct = (test_dct - dct_mean) / dct_std

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = lstm_classifier_majority_vote.create_model(
        (None, None, stream1_dim * 3), inputs, (None, None), mask, lstm_size,
        output_classes, weight_init_fn, use_peepholes)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([inputs, targets, mask],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function([inputs, targets, mask],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_dct,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_dct,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_dct,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens,
                                     integral_lens_val, np.max(val_vidlens))

    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens,
                                      integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            _, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens,
                                       integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format(
                epoch + 1, i + 1, epochsize, len(y), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val,
                                       val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test,
                                                 val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))

    # plot confusion matrix
    table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe')
    print('confusion matrix: ')
    print(table_str)

    if 'save_plot' in options:
        prefix = options['save_plot']
        plot_validation_cost(cost_train,
                             cost_val,
                             savefilename='{}.validloss.png'.format(prefix))
        with open('{}.confmat.txt'.format(prefix), mode='a') as f:
            f.write(table_str)
            f.write('\n\n')

    if 'write_results' in options:
        print('writing results to {}'.format(options['write_results']))
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

    if 'save_best' in options:
        print('saving best model...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('best model saved to {}'.format(options['save_best']))

コード例 #4

ファイルを表示

ファイル: unimodal_with_val.py プロジェクト: befkaduBelete/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    input_dimension = config.getint('models', 'input_dimension')
    output_classes = config.getint('models', 'output_classes')
    lstm_size = config.getint('models', 'lstm_size')
    nonlinearity = options[
        'nonlinearity'] if 'nonlinearity' in options else config.get(
            'models', 'nonlinearity')

    if nonlinearity == 'sigmoid':
        nonlinearity = sigmoid
    if nonlinearity == 'rectify':
        nonlinearity = rectify

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    no_epochs = int(
        options['no_epochs']) if 'no_epochs' in options else config.getint(
            'training', 'no_epochs')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = options[
        'epochsize'] if 'epochsize' in options else config.getint(
            'training', 'epochsize')
    batchsize = options[
        'batchsize'] if 'batchsize' in options else config.getint(
            'training', 'batchsize')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'training', 'use_peepholes')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, ))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, ))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, ))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_y = data['trTargetsVec'].astype('int').reshape(
        (-1, )) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1

    train_X = reorder_data(train_X, (30, 50))
    val_X = reorder_data(val_X, (30, 50))
    test_X = reorder_data(test_X, (30, 50))

    train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens)
    val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens)
    test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens)

    weights, biases = load_dbn(ae_pretrained)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = deltanet_majority_vote.create_model_using_pretrained_encoder(
        weights, biases, (None, None, input_dimension), inputs, (None, None),
        mask, lstm_size, window, output_classes, weight_init_fn, use_peepholes,
        nonlinearity)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = las.updates.adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([inputs, targets, mask, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, window],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function([inputs, targets, mask, window],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, _ = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 WINDOW_SIZE, val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(
        test_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

    if 'save_best' in options:
        print('Saving the best model so far...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('test reloading...')
        network = load_model_params(network, 'models/unimodal_with_val.pkl')
        test_predictions = las.layers.get_output(network, deterministic=True)
        val_fn = theano.function([inputs, mask, window],
                                 test_predictions,
                                 allow_input_downcast=True)
        test_cr, test_conf = evaluate_model(X_test, y_test, mask_test,
                                            WINDOW_SIZE, val_fn)
        print('classification rate: {}, validation loss: {}'.format(
            test_cr, best_val))

コード例 #5

ファイルを表示

ファイル: bimodal_diff_with_val.py プロジェクト: befkaduBelete/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_pretrained_diff = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')
    lstm_size = config.getint('models', 'lstm_size')
    output_classes = config.getint('models', 'output_classes')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'models', 'use_peepholes')
    use_blstm = config.getboolean('models', 'use_blstm')
    delta_window = config.getint('models', 'delta_window')
    input_dimensions = config.getint('models', 'input_dimensions')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    use_finetuning = config.getboolean('training', 'use_finetuning')
    learning_rate = config.getfloat('training', 'learning_rate')
    batchsize = config.getint('training', 'batchsize')
    epochsize = config.getint('training', 'epochsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y), ))
    dct_feats = dct_data['dctFeatures'].astype('float32')
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects), ))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens, )))

    # X = reorder_data(X, (26, 44), 'f', 'c')
    # print('performing sequencewise mean image removal...')
    # X = sequencewise_mean_image_subtraction(X, video_lens)
    # visualize_images(X[550:650], (26, 44))
    X_diff = compute_diff_images(X, video_lens)

    # mean remove dct features
    dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens)

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))
    train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \
    val_X, val_y, val_dct, val_X_diff, val_vidlens, val_subjects, \
    test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \
        split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[
        0] == len(video_lens)
    assert train_subjects.shape[0] + val_vidlens.shape[
        0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    print('loading pretrained encoder: {}...'.format(ae_pretrained))
    ae = load_dbn(ae_pretrained)

    print('loading pretrained encoder: {}...'.format(ae_pretrained_diff))
    ae_diff = load_dbn(ae_pretrained_diff)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    if use_blstm:
        network, l_fuse = adenet_v2_2.create_model(
            ae, ae_diff, (None, None, input_dimensions), inputs, (None, None),
            mask, (None, None, input_dimensions), inputs_diff, lstm_size,
            window, output_classes, fusiontype, weight_init_fn, use_peepholes)
    else:
        network, l_fuse = adenet_v2_4.create_model(
            ae, ae_diff, (None, None, input_dimensions), inputs, (None, None),
            mask, (None, None, input_dimensions), inputs_diff, lstm_size,
            window, output_classes, fusiontype, weight_init_fn, use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([inputs, targets, mask, inputs_diff, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, inputs_diff, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens,
                                        integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens,
                                         integral_lens_test,
                                         np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens,
                                            np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, delta_window)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, delta_window)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val,
                                     delta_window)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       X_diff_val, delta_window, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(
                    l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, delta_window,
                                                 val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{}\n'.format(validation_window, weight_init,
                                              use_peepholes, use_blstm,
                                              use_finetuning))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

            f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))

コード例 #6

ファイルを表示

ファイル: trimodal_with_val.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    fusiontype = config.get('models', 'fusiontype')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, ))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, ))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, ))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_dct = dct_data['trDctFeatures'].astype('float32')
    val_dct = dct_data['valDctFeatures'].astype('float32')
    test_dct = dct_data['testDctFeatures'].astype('float32')
    train_X_diff = compute_diff_images(train_X, train_vidlens)
    val_X_diff = compute_diff_images(val_X, val_vidlens)
    test_X_diff = compute_diff_images(test_X, test_vidlens)
    train_y = data['trTargetsVec'].astype('int').reshape(
        (-1, )) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    val_dct = (val_dct - dct_mean) / dct_std
    test_dct = (test_dct - dct_mean) / dct_std

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading finetuned encoder: {}...'.format(ae_finetuned_diff))
        ae_diff = pickle.load(open(ae_finetuned_diff, 'rb'))
        ae_diff.initialize()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    network, l_fuse = adenet_v3.create_model(ae, ae_diff, (None, None, 1500),
                                             inputs, (None, None), mask,
                                             (None, None, 90), dct,
                                             (None, None, 1500), inputs_diff,
                                             250, window, 10, fusiontype)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = adagrad(cost, all_params, learning_rate=lr)

    train = theano.function([inputs, targets, mask, dct, inputs_diff, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, dct, inputs_diff, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 45
    BATCH_SIZE = 20
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=BATCH_SIZE)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens,
                                     integral_lens_val, np.max(val_vidlens))
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens,
                                        integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens,
                                      integral_lens_test, np.max(test_vidlens))
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens,
                                         integral_lens_test,
                                         np.max(test_vidlens))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens,
                                       integral_lens, np.max(train_vidlens))
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens,
                                            np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val,
                                     X_diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val,
                                      X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(
                    l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test,
                                                dct_test, X_diff_test,
                                                WINDOW_SIZE, val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if options['write_results']:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(fusiontype, test_cr, best_val))

コード例 #7

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))

    ae_pretrained = config.get('models', 'pretrained')
    lstm_units = int(config.get('models', 'lstm_units'))
    output_classes = int(config.get('models', 'output_classes'))
    weight_init = config.get('models', 'weight_init')
    delta_window = config.getint('models', 'delta_window')
    nonlinearity = select_nonlinearity(config.get('models', 'nonlinearity'))

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    learning_rate = float(config.get('training', 'learning_rate'))
    no_epochs = config.getint('training', 'no_epochs')
    use_peepholes = config.getboolean('training', 'use_peepholes')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')
    validation_window = config.getint('training', 'validation_window')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype(
        'float32')  # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44))
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y), ))
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects), ))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens, )))

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))
    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = \
        split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[
        0] == len(video_lens)
    assert train_subjects.shape[0] + val_subjects.shape[
        0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    weights, biases = load_dbn(ae_pretrained)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = deltanet_majority_vote.create_model_using_pretrained_encoder(
        weights, biases, (None, None, 1144), inputs, (None, None), mask,
        lstm_units, window, output_classes, weight_init_fn, use_peepholes,
        nonlinearity)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate)

    train = theano.function([inputs, targets, mask, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, window],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function([inputs, targets, mask, window],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, _ = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, delta_window)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, delta_window)
        val_cost = compute_test_cost(X_val, y_val, mask_val, delta_window)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       delta_window, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 delta_window, val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(
        test_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #8

ファイルを表示

ファイル: dbn.py プロジェクト: konatasick/ip-avsr

def construct_lstm(input_size, lstm_size, output_size, train_data_gen,
                   val_data_gen):

    # All gates have initializers for the input-to-gate and hidden state-to-gate
    # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity.
    # The convention is that gates use the standard sigmoid nonlinearity,
    # which is the default for the Gate class.
    gate_parameters = Gate(W_in=las.init.Orthogonal(),
                           W_hid=las.init.Orthogonal(),
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(),
        W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    # prepare the input layers
    # By setting the first and second dimensions to None, we allow
    # arbitrary minibatch sizes with arbitrary sequence lengths.
    # The number of feature dimensions is 150, as described above.
    l_in = InputLayer(shape=(None, None, input_size), name='input')
    # This input will be used to provide the network with masks.
    # Masks are expected to be matrices of shape (n_batch, n_time_steps);
    # both of these dimensions are variable for us so we will use
    # an input shape of (None, None)
    l_mask = InputLayer(shape=(None, None), name='mask')

    # Our LSTM will have 250 hidden/cell units
    N_HIDDEN = lstm_size
    l_lstm = LSTMLayer(
        l_in,
        N_HIDDEN,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm1')
    '''
    # The "backwards" layer is the same as the first,
    # except that the backwards argument is set to True.
    l_lstm_back = LSTMLayer(
        l_in, N_HIDDEN, ingate=gate_parameters,
        mask_input=l_mask, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        learn_init=True, grad_clipping=5., backwards=True)
    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum = ElemwiseSumLayer([l_lstm, l_lstm_back])

    # implement drop-out regularization
    l_dropout = DropoutLayer(l_sum)

    l_lstm2 = LSTMLayer(
        l_dropout, N_HIDDEN,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5.)

    # The "backwards" layer is the same as the first,
    # except that the backwards argument is set to True.
    l_lstm_back2 = LSTMLayer(
        l_dropout, N_HIDDEN, ingate=gate_parameters,
        mask_input=l_mask, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        learn_init=True, grad_clipping=5., backwards=True)

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm_back2])
    '''
    # The l_forward layer creates an output of dimension (batch_size, SEQ_LENGTH, N_HIDDEN)
    # Since we are only interested in the final prediction, we isolate that quantity and feed it to the next layer.
    # The output of the sliced layer will then be of size (batch_size, N_HIDDEN)
    l_forward_slice = SliceLayer(l_lstm, -1, 1, name='slice')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(l_forward_slice,
                       num_units=output_size,
                       nonlinearity=las.nonlinearities.softmax,
                       name='output')

    print_network(l_out)
    # draw_to_file(las.layers.get_all_layers(l_out), 'network.png')

    # Symbolic variable for the target network output.
    # It will be of shape n_batch, because there's only 1 target value per sequence.
    target_values = T.ivector('target_output')

    # This matrix will tell the network the length of each sequences.
    # The actual values will be supplied by the gen_data function.
    mask = T.matrix('mask')

    # lasagne.layers.get_output produces an expression for the output of the net
    prediction = las.layers.get_output(l_out)

    # The value we care about is the final value produced for each sequence
    # so we simply slice it out.
    # predicted_values = network_output[:, -1]

    # Our cost will be categorical cross entropy error
    cost = T.mean(
        las.objectives.categorical_crossentropy(prediction, target_values))
    # cost = T.mean((predicted_values - target_values) ** 2)
    # Retrieve all parameters from the network
    all_params = las.layers.get_all_params(l_out, trainable=True)
    # Compute adam updates for training
    # updates = las.updates.adam(cost, all_params)
    updates = adadelta(cost, all_params)
    # Theano functions for training and computing cost
    train = theano.function([l_in.input_var, target_values, l_mask.input_var],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [l_in.input_var, target_values, l_mask.input_var],
        cost,
        allow_input_downcast=True)

    test_prediction = las.layers.get_output(l_out, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_prediction,
                                                target_values))
    compute_val_cost = theano.function(
        [l_in.input_var, target_values, l_mask.input_var],
        test_cost,
        allow_input_downcast=True)
    val_fn = theano.function([l_in.input_var, l_mask.input_var],
                             test_prediction,
                             allow_input_downcast=True)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = next(val_data_gen)

    # We'll train the network with 10 epochs of 100 minibatches each
    cost_train = []
    cost_val = []
    class_rate = []
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0
    NUM_EPOCHS = 30
    EPOCH_SIZE = 26
    STRIP_SIZE = 3
    MAX_LOSS = 0.05
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for _ in range(EPOCH_SIZE):
            X, y, m, _ = next(train_data_gen)
            train(X, y, m)
        train_cost = compute_train_cost(X, y, m)
        val_cost = compute_val_cost(X_val, y_val, mask_val)
        cr, conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        cost_train.append(train_cost)
        cost_val.append(val_cost)
        class_rate.append(cr)
        train_strip[epoch % STRIP_SIZE] = train_cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            best_conf = conf

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Final Model')
    print('classification rate: {}'.format(best_cr))
    print('validation loss: {}'.format(best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #9

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_finetuned = config.get('models', 'finetuned')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    load_finetune = config.getboolean('training', 'load_finetune')
    lstm_units = config.getint('training', 'lstm_units')
    output_units = config.getint('training', 'output_units')

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, ))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, ))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, ))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_y = data['trTargetsVec'].astype('int').reshape(
        (-1, )) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    if load_finetune:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()

    # recon = dbn.predict(test_X)
    # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44))
    # exit()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    # network = deltanet.create_model(dbn, (None, None, 1500), inputs,
    #                                 (None, None), mask, lstm_units, window, output_units)

    network = baseline_end2end.create_model(dbn, (None, None, 1500), inputs,
                                            (None, None), mask, lstm_units,
                                            output_units)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)

    train = theano.function([inputs, targets, mask],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function([inputs, targets, mask],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 45
    BATCH_SIZE = 20
    STRIP_SIZE = 3
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m)
            print('\r', end='')
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test,
                                                val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(
        test_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

    if 'save_best' in options:
        print('Saving the best model so far...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])

コード例 #10

ファイルを表示

ファイル: audio_visual_runner.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    data_audio = load_mat_file(config.get('data', 'audio'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_diff_pretrained = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')
    lstm_size = config.getint('models', 'lstm_size')
    output_classes = config.getint('models', 'output_classes')
    nonlinearity = options[
        'nonlinearity'] if 'nonlinearity' in options else config.get(
            'models', 'nonlinearity')

    if nonlinearity == 'sigmoid':
        nonlinearity = sigmoid
    if nonlinearity == 'rectify':
        nonlinearity = rectify

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'training', 'use_peepholes')
    input_dimension = config.getint('models', 'input_dimension')
    input_dimension2 = config.getint('models', 'input_dimension2')

    use_blstm = config.getboolean('training', 'use_blstm')
    use_finetuning = config.getboolean('training', 'use_finetuning')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, ))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, ))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, ))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_X_audio = data_audio['trData'].astype('float32')
    val_X_audio = data_audio['valData'].astype('float32')
    test_X_audio = data_audio['testData'].astype('float32')
    # +1 to handle the -1 introduced in lstm_gendata
    train_y = data['trTargetsVec'].astype('int').reshape((-1, )) + 1
    val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1

    train_X = reorder_data(train_X, (30, 50))
    val_X = reorder_data(val_X, (30, 50))
    test_X = reorder_data(test_X, (30, 50))

    visual_weights, visual_biases = load_dbn(ae_pretrained)
    audio_weights, audio_biases = load_dbn(ae_diff_pretrained)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    visual_input = T.tensor3('visual_input', dtype='float32')
    audio_input = T.tensor3('audio_input', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    visual_net = avnet.create_pretrained_substream(
        visual_weights, visual_biases, (None, None, input_dimension),
        visual_input, (None, None), mask, 'visual', lstm_size, window,
        nonlinearity, weight_init_fn, use_peepholes)

    audio_net = avnet.create_pretrained_substream(
        audio_weights, audio_biases, (None, None, input_dimension2),
        audio_input, (None, None), mask, 'audio', lstm_size, window,
        nonlinearity, weight_init_fn, use_peepholes)
    network, l_fuse = avnet.create_model([visual_net, audio_net], (None, None),
                                         mask, lstm_size, output_classes,
                                         fusiontype, weight_init_fn,
                                         use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([visual_input, targets, mask, audio_input, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [visual_input, targets, mask, audio_input, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [visual_input, targets, mask, audio_input, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([visual_input, mask, audio_input, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    EPOCH_SIZE = 90
    BATCH_SIZE = 10
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=BATCH_SIZE)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_audio, idxs_val, val_vidlens,
                                        integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_audio, idxs_test, test_vidlens,
                                         integral_lens_test,
                                         np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_audio, batch_idxs,
                                            train_vidlens, integral_lens,
                                            np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val,
                                     WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_tr = cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(
                    l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, WINDOW_SIZE,
                                                 val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
                use_finetuning, 'yes', use_peepholes, 'adam', weight_init,
                'RELU', use_blstm, learning_rate, best_tr, best_val,
                best_cr * 100, test_cr * 100))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

コード例 #11

ファイルを表示

ファイル: unimodal_dct_with_val.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(options['dct_data'] if 'dct_data' in options else config.get('data', 'dct'))
    no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint('models', 'no_coeff')
    no_epochs = options['no_epochs'] if 'no_epochs' in options else config.getint('training', 'no_epochs')
    validation_window = options['validation_window'] if 'validation_window' in options \
        else config.getint('training', 'validation_window')
    epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize')
    batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y),))
    dct_feats = dct_data['dctFeatures'].astype('float32')
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects),))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens,)))

    # X = reorder_data(X, (26, 44), 'f', 'c')
    # print('performing sequencewise mean image removal...')
    # X = sequencewise_mean_image_subtraction(X, video_lens)
    # visualize_images(X[550:650], (26, 44))

    # mean remove dct features
    # dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens)

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))
    train_X, train_y, train_dct, train_vidlens, train_subjects, \
    val_X, val_y, val_dct, val_vidlens, val_subjects, \
    test_X, test_y, test_dct, test_vidlens, test_subjects = \
        split_data(X, y, dct_feats, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    val_dct = (val_dct - dct_mean) / dct_std
    test_dct = (test_dct - dct_mean) / dct_std

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')

    print('constructing end to end model...')

    network = lstm_classifier_baseline.create_model((None, None, no_coeff*3), inputs,
                                                    (None, None), mask,
                                                    250, 10)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = adam(cost, all_params)

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs,
                                       train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            test_cr, test_conf = evaluate_model(dct_test, y_test, mask_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{}\n'.format(test_cr, best_val))

コード例 #12

ファイルを表示

ファイル: bimodal_diff_image.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    diff_data = load_mat_file(config.get('data', 'diff'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    fusiontype = config.get('models', 'fusiontype')
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')
    model = config.get('models', 'model')

    # capture training parameters
    update_rule = options['update_rule'] if 'update_rule' in options else config.get('training', 'update_rule')
    learning_rate = float(options['learning_rate']) \
        if 'learning_rate' in options else config.getfloat('training', 'learning_rate')
    decay_rate = float(options['decay_rate']) if 'decay_rate' in options else config.getfloat('training', 'decay_rate')
    decay_start = int(options['decay_start']) if 'decay_start' in options else config.getint('training', 'decay_start')
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    t1 = int(options['t1']) if 't1' in options else config.getint('training', 't1')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training',
                                                                                                  'use_peepholes')

    if update_rule == 'sgdm' or update_rule == 'sgdnm':
        momentum = float(options['momentum']) if 'momentum' in options else config.getfloat('training', 'momentum')
        momentum_schedule = options['momentum_schedule'] \
            if 'momentum_schedule' in options else config.get('training', 'momentum_schedule')
        mm_schedule = [float(m) for m in momentum_schedule.split(',')]

    weight_init_fn = las.init.Orthogonal()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    # create the necessary variable mappings
    data_matrix = data['dataMatrix']
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']
    diff_data_matrix = diff_data['dataMatrix']

    # samplewise normalize
    # print('sameplewise mean normalize...')
    # data_matrix = normalize_input(data_matrix)
    # diff_data_matrix = normalize_input(diff_data_matrix)
    # diff_data_matrix = compute_diff_images(data_matrix, vid_len_vec.reshape((-1,))).astype('float32')

    # mean remove
    # dct_feats = dct_feats[:, 0:30]
    # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,)))

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets),))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets),))
    train_diff_data = diff_data_matrix[indexes == True]
    test_diff_data = diff_data_matrix[indexes == False]

    if do_finetune:
        print('fine-tuning...')
        ae = load_dbn(ae_pretrained)
        ae.initialize()
        ae.fit(train_data, train_data)
        res = ae.predict(test_data)
        # print(res.shape)
        visualize_reconstruction(test_data[300:336], res[300:336])

    if save_finetune:
        pickle.dump(ae, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading pre-trained encoding layers...')
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading pre-trained diff image encoding layers...')
        diff_ae = pickle.load(open(ae_finetuned_diff, 'rb'))
        diff_ae.initialize()

    load_convae = False
    if load_convae:
        print('loading pre-trained convolutional autoencoder...')
        encoder = load_model('models/conv_encoder_norm.dat')
        inputs_raw = las.layers.get_all_layers(encoder)[0].input_var
    else:
        inputs_raw = T.tensor3('inputs_raw', dtype='float32')
        inputs_diff = T.tensor3('inputs_diff', dtype='float32')

    window = T.iscalar('theta')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    if update_rule == 'sgdm' or update_rule == 'sgdnm':
        mm = theano.shared(np.array(momentum, dtype=theano.config.floatX), name='momentum')

    print('constructing end to end model...')

    if model == 'adenet_v2_1':
        network, l_fuse = adenet_v2_1.create_model(ae, diff_ae, (None, None, 1200), inputs_raw,
                                                   (None, None), mask,
                                                   (None, None, 1200), inputs_diff,
                                                   250, window, 26, fusiontype,
                                                   w_init_fn=weight_init_fn,
                                                   use_peepholes=use_peepholes)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    if update_rule == 'adadelta':
        updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    if update_rule == 'sgdm':
        updates = las.updates.sgd(cost, all_params, learning_rate=lr)
        updates = las.updates.apply_momentum(updates, all_params, momentum=mm)
    if update_rule == 'sgdnm':
        updates = las.updates.sgd(cost, all_params, learning_rate=lr)
        updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=mm)
    if update_rule == 'adam':
        updates = las.updates.adam(cost, all_params)

    train = theano.function(
        [inputs_raw, targets, mask, inputs_diff, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs_raw, targets, mask, inputs_diff, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs_raw, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs_raw, mask, inputs_diff, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))
    integral_lens = compute_integral_len(train_vidlen_vec)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlen_vec)
    diff_val = gen_seq_batch_from_idx(test_diff_data, idxs_val,
                                      test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec))

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            diff = gen_seq_batch_from_idx(train_diff_data, batch_idxs,
                                          train_vidlen_vec, integral_lens, np.max(train_vidlen_vec))
            if update_rule == 'adam':
                print_str = 'Epoch {} batch {}/{}: {} examples with {} using default params'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X), update_rule)
            if update_rule == 'adadelta':
                print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f} with {}'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), update_rule)
            if update_rule == 'sgdm' or update_rule == 'sgdnm':
                print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}, ' \
                            'momentum = {:.4f} with {}'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), float(mm.get_value()), update_rule)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
        else:
            if epoch >= t1 and (update_rule == 'sgdm' or update_rule == 'sgdnm'):
                lr.set_value(max(lr.get_value() * lr_decay, 0.001))
                if mm_schedule:
                    mm.set_value(mm_schedule.pop(0))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
               'h', 'i', 'j', 'k', 'l', 'm', 'n',
               'o', 'p', 'q', 'r', 's', 't', 'u',
               'v', 'w', 'x', 'y', 'z']

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(best_cr, best_val))
    if fusiontype == 'adasum':
        adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True)
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    if not options['no_plot']:
        plot_confusion_matrix(best_conf, letters, fmt='latex')
        plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{},{},{},{},{}\n'.format(update_rule, learning_rate, decay_rate, momentum,
                                                          decay_start, t1, validation_window,
                                                          weight_init, use_peepholes))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

            f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))

コード例 #13

ファイルを表示

ファイル: unimodal_dct_with_val.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('CLI options: {}'.format(options.items()))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    no_coeff = config.getint('models', 'no_coeff')
    output_classes = config.getint('models', 'output_classes')
    lstm_size = config.getint('models', 'lstm_size')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    no_epochs = int(options['no_epochs']) if 'no_epochs' in options else config.getint('training', 'no_epochs')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize')
    batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training',
                                                                                                  'use_peepholes')
    use_blstm = config.getboolean('training', 'use_blstm')
    use_finetuning = config.getboolean('training', 'use_finetuning')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_dct = dct_data['trDctFeatures'].astype('float32')
    val_dct = dct_data['valDctFeatures'].astype('float32')
    test_dct = dct_data['testDctFeatures'].astype('float32')
    # +1 to handle the -1 introduced in lstm_gendata
    train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1
    val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    val_dct = (val_dct - dct_mean) / dct_std
    test_dct = (test_dct - dct_mean) / dct_std

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = lstm_classifier_majority_vote.create_model((None, None, no_coeff*3), inputs,
                                                         (None, None), mask,
                                                         lstm_size, output_classes, w_init=weight_init_fn)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params)

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            _, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(y))
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(use_finetuning, 'yes', use_peepholes,
                                                                'adam', weight_init, 'N/A',
                                                                use_blstm, learning_rate, best_tr,
                                                                best_val, best_cr*100, test_cr*100))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

コード例 #14

ファイルを表示

ファイル: bimodal_diff_with_val.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_pretrained_diff = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training',
                                                                                                  'use_peepholes')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')
    windowsize = config.getint('training', 'windowsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')

    data_matrix = data['dataMatrix']
    targets_vec = data['targetsVec'].reshape((-1,))
    subjects_vec = data['subjectsVec'].reshape((-1,))
    vidlen_vec = data['videoLengthVec'].reshape((-1,))

    data_matrix = reorder_data(data_matrix, (30, 50))

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec,
                                                                 train_subject_ids, val_subject_ids, test_subject_ids)

    train_X_diff = compute_diff_images(train_X, train_vidlens)
    val_X_diff = compute_diff_images(val_X, val_vidlens)
    test_X_diff = compute_diff_images(test_X, test_vidlens)

    train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens)
    val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens)
    test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens)

    ae = load_dbn(ae_pretrained)
    ae_diff = load_dbn(ae_pretrained_diff)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network, l_fuse = adenet_v2_2.create_model(ae, ae_diff, (None, None, 1500), inputs,
                                               (None, None), mask,
                                               (None, None, 1500), inputs_diff,
                                               250, window, 10, fusiontype,
                                               w_init_fn=weight_init_fn,
                                               use_peepholes=use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, inputs_diff, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, windowsize)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, windowsize)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, windowsize)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, windowsize, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_tr = cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, windowsize, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

コード例 #15

ファイルを表示

ファイル: 3stream.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('stream2'))
    print(config.items('stream3'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('preprocessing dataset...')

    # stream 1
    s1_data = load_mat_file(config.get('stream1', 'data'))
    s1_imagesize = tuple(
        [int(d) for d in config.get('stream1', 'imagesize').split(',')])
    s1 = config.get('stream1', 'model')
    s1_inputdim = config.getint('stream1', 'input_dimensions')
    s1_shape = config.get('stream1', 'shape')
    s1_nonlinearities = config.get('stream1', 'nonlinearities')

    # stream 2
    s2_data = load_mat_file(config.get('stream2', 'data'))
    s2_imagesize = tuple(
        [int(d) for d in config.get('stream2', 'imagesize').split(',')])
    s2 = config.get('stream2', 'model')
    s2_inputdim = config.getint('stream2', 'input_dimensions')
    s2_shape = config.get('stream2', 'shape')
    s2_nonlinearities = config.get('stream2', 'nonlinearities')

    # stream 3
    s3_data = load_mat_file(config.get('stream3', 'data'))
    s3_imagesize = tuple(
        [int(d) for d in config.get('stream3', 'imagesize').split(',')])
    s3 = config.get('stream3', 'model')
    s3_inputdim = config.getint('stream3', 'input_dimensions')
    s3_shape = config.get('stream3', 'shape')
    s3_nonlinearities = config.get('stream3', 'nonlinearities')

    # lstm classifier
    fusiontype = config.get('lstm_classifier', 'fusiontype')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'lstm_classifier', 'weight_init')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'lstm_classifier', 'use_peepholes')
    windowsize = config.getint('lstm_classifier', 'windowsize')
    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier',
                                   'output_classnames').split(',')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    matlab_target_offset = config.getboolean('lstm_classifier',
                                             'matlab_target_offset')
    use_dropout = config.getboolean('lstm_classifier', 'use_dropout')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file(
        config.get('training', 'train_subjects_file'))
    val_subject_ids = read_data_split_file(
        config.get('training', 'val_subjects_file'))
    test_subject_ids = read_data_split_file(
        config.get('training', 'test_subjects_file'))

    s1_data_matrix = s1_data['dataMatrix'].astype('float32')
    s2_data_matrix = s2_data['dataMatrix'].astype('float32')
    s3_data_matrix = s3_data['dataMatrix'].astype('float32')

    targets_vec = s1_data['targetsVec'].reshape((-1, ))
    subjects_vec = s1_data['subjectsVec'].reshape((-1, ))
    vidlen_vec = s1_data['videoLengthVec'].reshape((-1, ))

    force_align_data = config.getboolean('stream1', 'force_align_data')

    if matlab_target_offset:
        targets_vec -= 1

    s1_data_matrix = presplit_dataprocessing(s1_data_matrix,
                                             vidlen_vec,
                                             config,
                                             'stream1',
                                             imagesize=s1_imagesize)
    s2_data_matrix = presplit_dataprocessing(s2_data_matrix,
                                             vidlen_vec,
                                             config,
                                             'stream2',
                                             imagesize=s2_imagesize)
    s3_data_matrix = presplit_dataprocessing(s3_data_matrix,
                                             vidlen_vec,
                                             config,
                                             'stream3',
                                             imagesize=s3_imagesize)

    if force_align_data:
        s2_targets_vec = s2_data['targetsVec'].reshape((-1, ))
        s2_vidlen_vec = s2_data['videoLengthVec'].reshape((-1, ))
        s3_targets_vec = s3_data['targetsVec'].reshape((-1, ))
        s3_vidlen_vec = s3_data['videoLengthVec'].reshape((-1, ))
        orig_streams = [
            (s1_data_matrix, targets_vec, vidlen_vec),
            (s2_data_matrix, s2_targets_vec, s2_vidlen_vec),
            (s3_data_matrix, s3_targets_vec, s3_vidlen_vec),
        ]
        new_streams = multistream_force_align(orig_streams)
        s1_data_matrix, targets_vec, vidlen_vec = new_streams[0]
        s2_data_matrix, _, _ = new_streams[1]
        s3_data_matrix, _, _ = new_streams[2]

    s1_train_X, s1_train_y, s1_train_vidlens, s1_train_subjects, \
    s1_val_X, s1_val_y, s1_val_vidlens, s1_val_subjects, \
    s1_test_X, s1_test_y, s1_test_vidlens, s1_test_subjects = split_seq_data(s1_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s2_train_X, s2_train_y, s2_train_vidlens, s2_train_subjects, \
    s2_val_X, s2_val_y, s2_val_vidlens, s2_val_subjects, \
    s2_test_X, s2_test_y, s2_test_vidlens, s2_test_subjects = split_seq_data(s2_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)
    s3_train_X, s3_train_y, s3_train_vidlens, s3_train_subjects, \
    s3_val_X, s3_val_y, s3_val_vidlens, s3_val_subjects, \
    s3_test_X, s3_test_y, s3_test_vidlens, s3_test_subjects = split_seq_data(s3_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s1_train_X, s1_val_X, s1_test_X = postsplit_datapreprocessing(
        s1_train_X, s1_val_X, s1_test_X, config, 'stream1')
    s2_train_X, s2_val_X, s2_test_X = postsplit_datapreprocessing(
        s2_train_X, s2_val_X, s2_test_X, config, 'stream2')
    s3_train_X, s3_val_X, s3_test_X = postsplit_datapreprocessing(
        s3_train_X, s3_val_X, s3_test_X, config, 'stream3')

    ae1 = load_decoder(s1, s1_shape, s1_nonlinearities)
    ae2 = load_decoder(s2, s2_shape, s2_nonlinearities)
    ae3 = load_decoder(s3, s3_shape, s3_nonlinearities)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs1 = T.tensor3('inputs1', dtype='float32')
    inputs2 = T.tensor3('inputs2', dtype='float32')
    inputs3 = T.tensor3('inputs3', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    if use_dropout:
        network, l_fuse = adenet_3stream_dropout.create_model(
            ae1,
            ae2,
            ae3, (None, None, s1_inputdim),
            inputs1, (None, None, s2_inputdim),
            inputs2, (None, None, s3_inputdim),
            inputs3, (None, None),
            mask,
            lstm_size,
            window,
            output_classes,
            fusiontype,
            w_init_fn=weight_init_fn,
            use_peepholes=use_peepholes)
    else:
        network, l_fuse = adenet_3stream.create_model(
            ae1,
            ae2,
            ae3, (None, None, s1_inputdim),
            inputs1, (None, None, s2_inputdim),
            inputs2, (None, None, s3_inputdim),
            inputs3, (None, None),
            mask,
            lstm_size,
            window,
            output_classes,
            fusiontype,
            w_init_fn=weight_init_fn,
            use_peepholes=use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([inputs1, inputs2, inputs3, targets, mask, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs1, inputs2, inputs3, targets, mask, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs1, inputs2, inputs3, targets, mask, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs1, inputs2, inputs3, mask, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(s1_train_X,
                                    s1_train_y,
                                    s1_train_vidlens,
                                    batchsize=batchsize)
    integral_lens = compute_integral_len(s1_train_vidlens)

    val_datagen = gen_lstm_batch_random(s1_val_X,
                                        s1_val_y,
                                        s1_val_vidlens,
                                        batchsize=len(s1_val_vidlens))
    test_datagen = gen_lstm_batch_random(s1_test_X,
                                         s1_test_y,
                                         s1_test_vidlens,
                                         batchsize=len(s1_test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_s1_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(s1_val_vidlens)
    X_s2_val = gen_seq_batch_from_idx(s2_val_X, idxs_val,
                                      s1_val_vidlens, integral_lens_val,
                                      np.max(s1_val_vidlens))
    X_s3_val = gen_seq_batch_from_idx(s3_val_X, idxs_val,
                                      s1_val_vidlens, integral_lens_val,
                                      np.max(s1_val_vidlens))

    # we use the test set to check final classification rate
    X_s1_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(s1_test_vidlens)
    X_s2_test = gen_seq_batch_from_idx(s2_test_X, idxs_test, s1_test_vidlens,
                                       integral_lens_test,
                                       np.max(s1_test_vidlens))
    X_s3_test = gen_seq_batch_from_idx(s3_test_X, idxs_test, s1_test_vidlens,
                                       integral_lens_test,
                                       np.max(s1_test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X_s1, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_s2 = gen_seq_batch_from_idx(s2_train_X, batch_idxs,
                                          s1_train_vidlens, integral_lens,
                                          np.max(s1_train_vidlens))
            X_s3 = gen_seq_batch_from_idx(s3_train_X, batch_idxs,
                                          s1_train_vidlens, integral_lens,
                                          np.max(s1_train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format(
                epoch + 1, i + 1, epochsize, len(X_s1), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X_s1, X_s2, X_s3, y, m, windowsize)
            print('\r', end='')
        cost = compute_train_cost(X_s1, X_s2, X_s3, y, m, windowsize)
        val_cost = compute_test_cost(X_s1_val, X_s2_val, X_s3_val, y_val,
                                     mask_val, windowsize)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_s1_val, X_s2_val, X_s3_val,
                                       y_val_evaluate, mask_val, windowsize,
                                       val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            test_cr, test_conf = evaluate_model2(X_s1_test, X_s2_test,
                                                 X_s3_test, y_test, mask_test,
                                                 windowsize, val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))

    # plot confusion matrix
    table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe')
    print('confusion matrix: ')
    print(table_str)

    if 'save_plot' in options:
        prefix = options['save_plot']
        plot_validation_cost(cost_train,
                             cost_val,
                             savefilename='{}.validloss.png'.format(prefix))
        with open('{}.confmat.txt'.format(prefix), mode='a') as f:
            f.write(table_str)
            f.write('\n\n')

    if 'write_results' in options:
        print('writing results to {}'.format(options['write_results']))
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

    if 'save_best' in options:
        print('saving best model...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('best model saved to {}'.format(options['save_best']))

コード例 #16

ファイルを表示

ファイル: unimodal.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('stream1', 'data'))
    has_encoder = config.getboolean('stream1', 'has_encoder')
    stream1_dim = config.getint('stream1', 'input_dimensions')
    imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')])
    if has_encoder:
        stream1 = config.get('stream1', 'model')
        stream1_shape = config.get('stream1', 'shape')
        stream1_nonlinearities = config.get('stream1', 'nonlinearities')

    # lstm classifier
    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier', 'output_classnames').split(',')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset')

    # lstm classifier configurations
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier',
                                                                                                  'use_peepholes')
    use_blstm = True if config.has_option('lstm_classifier', 'use_blstm') else False
    windowsize = config.getint('lstm_classifier', 'windowsize')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')

    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    data_matrix = data['dataMatrix'].astype('float32')
    targets_vec = data['targetsVec'].reshape((-1,))
    subjects_vec = data['subjectsVec'].reshape((-1,))
    vidlen_vec = data['videoLengthVec'].reshape((-1,))
    iter_vec = data['iterVec'].reshape((-1,))

    data_matrix = presplit_dataprocessing(data_matrix, vidlen_vec, config, 'stream1', imagesize=imagesize)

    indexes = create_split_index(len(data_matrix), vidlen_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vidlen_vec, iter_vec)

    if matlab_target_offset:
        targets_vec -= 1

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets),))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets),))

    train_data, test_data = postsplit_datapreprocessing(train_data, test_data, config, 'stream1')

    inputs = T.tensor3('inputs', dtype='float32')
    window = T.iscalar('theta')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    if not has_encoder:
        network = deltanet_v1.create_model((None, None, stream1_dim), inputs,
                                           (None, None), mask, window,
                                           lstm_size, output_classes, weight_init_fn, use_peepholes, use_blstm)
    else:
        ae1 = load_decoder(stream1, stream1_shape, stream1_nonlinearities)
        network = deltanet_majority_vote.create_model(ae1, (None, None, stream1_dim), inputs,
                                                      (None, None), mask,
                                                      lstm_size, window, output_classes, weight_init_fn, use_peepholes)

    print_network(network)
    draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True)
    # exit()
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = las.updates.adam(cost, all_params, learning_rate)

    train = theano.function(
        [inputs, targets, mask, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, windowsize)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, windowsize)
        val_cost = compute_test_cost(X_val, y_val, mask_val, windowsize)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, windowsize, val_fn)
        class_rate.append(cr)

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, output_classnames, fmt='latex')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #17

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('CLI options: {}'.format(options.items()))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    no_coeff = config.getint('models', 'no_coeff')
    output_classes = config.getint('models', 'output_classes')
    lstm_size = config.getint('models', 'lstm_size')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    no_epochs = int(
        options['no_epochs']) if 'no_epochs' in options else config.getint(
            'training', 'no_epochs')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = options[
        'epochsize'] if 'epochsize' in options else config.getint(
            'training', 'epochsize')
    batchsize = options[
        'batchsize'] if 'batchsize' in options else config.getint(
            'training', 'batchsize')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'training', 'use_peepholes')
    use_blstm = config.getboolean('training', 'use_blstm')
    use_finetuning = config.getboolean('training', 'use_finetuning')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, ))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, ))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, ))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_dct = dct_data['trDctFeatures'].astype('float32')
    val_dct = dct_data['valDctFeatures'].astype('float32')
    test_dct = dct_data['testDctFeatures'].astype('float32')
    # +1 to handle the -1 introduced in lstm_gendata
    train_y = data['trTargetsVec'].astype('int').reshape((-1, )) + 1
    val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    val_dct = (val_dct - dct_mean) / dct_std
    test_dct = (test_dct - dct_mean) / dct_std

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = lstm_classifier_majority_vote.create_model(
        (None, None, no_coeff * 3),
        inputs, (None, None),
        mask,
        lstm_size,
        output_classes,
        w_init=weight_init_fn)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params)

    train = theano.function([inputs, targets, mask],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function([inputs, targets, mask],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens,
                                     integral_lens_val, np.max(val_vidlens))

    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens,
                                      integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            _, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens,
                                       integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(y))
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val,
                                       val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test,
                                                 val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
                use_finetuning, 'yes', use_peepholes, 'adam', weight_init,
                'N/A', use_blstm, learning_rate, best_tr, best_val,
                best_cr * 100, test_cr * 100))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

コード例 #18

ファイルを表示

ファイル: unimodal_dct.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(options['dct_data'] if 'dct_data' in
                             options else config.get('data', 'dct'))
    no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint(
        'models', 'no_coeff')
    no_epochs = options[
        'no_epochs'] if 'no_epochs' in options else config.getint(
            'training', 'no_epochs')
    validation_window = options['validation_window'] if 'validation_window' in options \
        else config.getint('training', 'validation_window')
    epochsize = options[
        'epochsize'] if 'epochsize' in options else config.getint(
            'training', 'epochsize')
    batchsize = options[
        'batchsize'] if 'batchsize' in options else config.getint(
            'training', 'batchsize')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix'].astype('float32')
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']
    dct_feats = dct_data['dctFeatures'].astype('float32')

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets), ))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets), ))

    # split the dct features
    train_dct = dct_feats[indexes == True].astype(np.float32)
    test_dct = dct_feats[indexes == False].astype(np.float32)
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')

    print('constructing end to end model...')
    network = lstm_classifier_baseline.create_model(
        (None, None, no_coeff * 3), inputs, (None, None), mask, 250, 26)

    print_network(network)
    draw_to_file(las.layers.get_all_layers(network),
                 'network.png',
                 verbose=True)
    # exit()
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = las.updates.adam(cost, all_params)

    train = theano.function([inputs, targets, mask],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function([inputs, targets, mask],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data,
                                    train_targets,
                                    train_vidlen_vec,
                                    batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(test_data,
                                        test_targets,
                                        test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))
    integral_lens = compute_integral_len(train_vidlen_vec)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlen_vec)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val,
                                     test_vidlen_vec, integral_lens_val,
                                     np.max(test_vidlen_vec))

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec,
                                       integral_lens, np.max(train_vidlen_vec))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='latex')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #19

ファイルを表示

ファイル: unimodal_dct.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(options['dct_data'] if 'dct_data' in options else config.get('data', 'dct'))
    no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint('models', 'no_coeff')
    no_epochs = options['no_epochs'] if 'no_epochs' in options else config.getint('training', 'no_epochs')
    validation_window = options['validation_window'] if 'validation_window' in options \
        else config.getint('training', 'validation_window')
    epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize')
    batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix'].astype('float32')
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']
    dct_feats = dct_data['dctFeatures'].astype('float32')

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets),))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets),))

    # split the dct features
    train_dct = dct_feats[indexes == True].astype(np.float32)
    test_dct = dct_feats[indexes == False].astype(np.float32)
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')

    print('constructing end to end model...')
    network = lstm_classifier_baseline.create_model((None, None, no_coeff*3), inputs,
                                                    (None, None), mask,
                                                    250, 26)

    print_network(network)
    draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True)
    # exit()
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = las.updates.adam(cost, all_params)

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))
    integral_lens = compute_integral_len(train_vidlen_vec)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlen_vec)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec))

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs,
                                       train_vidlen_vec, integral_lens, np.max(train_vidlen_vec))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(d, y, m)
            print('\r', end='')
        cost = compute_train_cost(d, y, m)
        val_cost = compute_test_cost(dct_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
               'h', 'i', 'j', 'k', 'l', 'm', 'n',
               'o', 'p', 'q', 'r', 's', 't', 'u',
               'v', 'w', 'x', 'y', 'z']

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='latex')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #20

ファイルを表示

ファイル: trimodal_with_val.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    fusiontype = config.get('models', 'fusiontype')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_dct = dct_data['trDctFeatures'].astype('float32')
    val_dct = dct_data['valDctFeatures'].astype('float32')
    test_dct = dct_data['testDctFeatures'].astype('float32')
    train_X_diff = compute_diff_images(train_X, train_vidlens)
    val_X_diff = compute_diff_images(val_X, val_vidlens)
    test_X_diff = compute_diff_images(test_X, test_vidlens)
    train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1  # +1 to handle the -1 introduced in lstm_gendata
    val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    val_dct = (val_dct - dct_mean) / dct_std
    test_dct = (test_dct - dct_mean) / dct_std

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading finetuned encoder: {}...'.format(ae_finetuned_diff))
        ae_diff = pickle.load(open(ae_finetuned_diff, 'rb'))
        ae_diff.initialize()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    network, l_fuse = adenet_v3.create_model(ae, ae_diff, (None, None, 1500), inputs,
                                             (None, None), mask,
                                             (None, None, 90), dct,
                                             (None, None, 1500), inputs_diff,
                                             250, window, 10, fusiontype)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = adagrad(cost, all_params, learning_rate=lr)

    train = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, dct, inputs_diff, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 45
    BATCH_SIZE = 20
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs,
                                       train_vidlens, integral_lens, np.max(train_vidlens))
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test,
                                                dct_test, X_diff_test, WINDOW_SIZE, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if options['write_results']:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(fusiontype, test_cr, best_val))

コード例 #21

ファイルを表示

ファイル: unimodal_with_val.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))

    ae_pretrained = config.get('models', 'pretrained')
    lstm_units = int(config.get('models', 'lstm_units'))
    output_classes = int(config.get('models', 'output_classes'))
    weight_init = config.get('models', 'weight_init')
    delta_window = config.getint('models', 'delta_window')
    nonlinearity = select_nonlinearity(config.get('models', 'nonlinearity'))

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    learning_rate = float(config.get('training', 'learning_rate'))
    no_epochs = config.getint('training', 'no_epochs')
    use_peepholes = config.getboolean('training', 'use_peepholes')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')
    validation_window = config.getint('training', 'validation_window')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')  # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44))
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y),))
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects),))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens,)))

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))
    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = \
        split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    weights, biases = load_dbn(ae_pretrained)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = deltanet_majority_vote.create_model_using_pretrained_encoder(weights, biases, (None, None, 1144), inputs,
                                                                           (None, None), mask, lstm_units,
                                                                           window, output_classes,
                                                                           weight_init_fn, use_peepholes, nonlinearity)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate)

    train = theano.function(
        [inputs, targets, mask, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)

    # Use this test set to check final classification performance
    X_test, y_test, mask_test, _ = next(test_datagen)

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(no_epochs):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, _ = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, delta_window)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, delta_window)
        val_cost = compute_test_cost(X_val, y_val, mask_val, delta_window)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, delta_window, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, delta_window, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(test_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #22

ファイルを表示

ファイル: bimodal_diff_with_val.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_pretrained_diff = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')
    lstm_size = config.getint('models', 'lstm_size')
    output_classes = config.getint('models', 'output_classes')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('models',
                                                                                                  'use_peepholes')
    use_blstm = config.getboolean('models', 'use_blstm')
    delta_window = config.getint('models', 'delta_window')
    input_dimensions = config.getint('models', 'input_dimensions')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    use_finetuning = config.getboolean('training', 'use_finetuning')
    learning_rate = config.getfloat('training', 'learning_rate')
    batchsize = config.getint('training', 'batchsize')
    epochsize = config.getint('training', 'epochsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y),))
    dct_feats = dct_data['dctFeatures'].astype('float32')
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects),))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens,)))

    # X = reorder_data(X, (26, 44), 'f', 'c')
    # print('performing sequencewise mean image removal...')
    # X = sequencewise_mean_image_subtraction(X, video_lens)
    # visualize_images(X[550:650], (26, 44))
    X_diff = compute_diff_images(X, video_lens)

    # mean remove dct features
    dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens)

    train_subject_ids = read_data_split_file('data/train_30_10_12.txt')
    val_subject_ids = read_data_split_file('data/val_30_10_12.txt')
    test_subject_ids = read_data_split_file('data/test_30_10_12.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))
    train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \
    val_X, val_y, val_dct, val_X_diff, val_vidlens, val_subjects, \
    test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \
        split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + val_vidlens.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    val_X = normalize_input(val_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    train_y -= 1
    val_y -= 1
    test_y -= 1

    print('loading pretrained encoder: {}...'.format(ae_pretrained))
    ae = load_dbn(ae_pretrained)

    print('loading pretrained encoder: {}...'.format(ae_pretrained_diff))
    ae_diff = load_dbn(ae_pretrained_diff)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    if use_blstm:
        network, l_fuse = adenet_v2_2.create_model(ae, ae_diff, (None, None, input_dimensions), inputs,
                                                   (None, None), mask,
                                                   (None, None, input_dimensions), inputs_diff,
                                                   lstm_size, window, output_classes, fusiontype,
                                                   weight_init_fn, use_peepholes)
    else:
        network, l_fuse = adenet_v2_4.create_model(ae, ae_diff, (None, None, input_dimensions), inputs,
                                                   (None, None), mask,
                                                   (None, None, input_dimensions), inputs_diff,
                                                   lstm_size, window, output_classes, fusiontype,
                                                   weight_init_fn, use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, inputs_diff, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, delta_window)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, delta_window)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, delta_window)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, delta_window, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, delta_window, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{}\n'.format(validation_window, weight_init, use_peepholes, use_blstm, use_finetuning))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

            f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))

コード例 #23

ファイルを表示

ファイル: leave_one_out.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = 'config/leave_one_out.ini'
    print('loading config file: {}'.format(config_file))
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y), ))
    dct_feats = dct_data['dctFeatures'].astype('float32')
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects), ))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens, )))

    # X = reorder_data(X, (26, 44), 'f', 'c')
    # print('performing sequencewise mean image removal...')
    # X = sequencewise_mean_image_subtraction(X, video_lens)
    # visualize_images(X[550:650], (26, 44))
    X_diff = compute_diff_images(X, video_lens)

    # mean remove dct features
    dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens)

    test_subject_ids = [options['test_subj']]
    train_subject_ids = range(1, 54)
    for subj in test_subject_ids:
        train_subject_ids.remove(subj)

    if 'results' in options:
        results_file = options['results']
        f = open(results_file, mode='a')

    print(train_subject_ids)
    print(test_subject_ids)
    train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \
    test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \
        split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids)

    assert train_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    if do_finetune:
        print('performing finetuning on pretrained encoder: {}'.format(
            ae_pretrained))
        ae = load_dbn(ae_pretrained)
        ae.initialize()
        ae.fit(train_X, train_X)

    if save_finetune:
        print('saving finetuned encoder: {}...'.format(ae_finetuned))
        pickle.dump(ae, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading finetuned encoder: {}...'.format(ae_finetuned_diff))
        ae_diff = pickle.load(open(ae_finetuned_diff, 'rb'))
        ae_diff.initialize()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    '''
    network = create_end_to_end_model(dbn, (None, None, 1144), inputs,
                                      (None, None), mask, 250, window)
    '''

    network = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs,
                                     (None, None), mask, (None, None, 90), dct,
                                     (None, None, 1144), inputs_diff, 250,
                                     window, 10)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = adagrad(cost, all_params, learning_rate=lr)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(
                    param,
                    MAX_NORM *
                    las.utils.compute_norms(param.get_value()).mean())

    train = theano.function([inputs, targets, mask, dct, inputs_diff, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, dct, inputs_diff, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 10
    EPOCH_SIZE = 120
    BATCH_SIZE = 10
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_X,
                                        test_y,
                                        test_vidlens,
                                        batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlens)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens,
                                     integral_lens_val, np.max(test_vidlens))
    X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val,
                                        test_vidlens, integral_lens_val,
                                        np.max(test_vidlens))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens,
                                       integral_lens, np.max(train_vidlens))
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens,
                                            np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val,
                                     X_diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val,
                                      X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch >= decay_start - 1:
            lr.set_value(lr.get_value() * lr_decay)

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train,
                         cost_val,
                         class_rate,
                         savefilename='valid_cost')

    if 'results' in options:
        print('writing to results file: {}...'.format(options['results']))
        f.write('{}, {}, {}\n'.format(test_subject_ids[0], best_cr, best_val))
        f.close()

コード例 #24

ファイルを表示

ファイル: dbn.py プロジェクト: behtak/ip-avsr

def construct_lstm(input_size, lstm_size, output_size, train_data_gen, val_data_gen):

    # All gates have initializers for the input-to-gate and hidden state-to-gate
    # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity.
    # The convention is that gates use the standard sigmoid nonlinearity,
    # which is the default for the Gate class.
    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    # prepare the input layers
    # By setting the first and second dimensions to None, we allow
    # arbitrary minibatch sizes with arbitrary sequence lengths.
    # The number of feature dimensions is 150, as described above.
    l_in = InputLayer(shape=(None, None, input_size), name='input')
    # This input will be used to provide the network with masks.
    # Masks are expected to be matrices of shape (n_batch, n_time_steps);
    # both of these dimensions are variable for us so we will use
    # an input shape of (None, None)
    l_mask = InputLayer(shape=(None, None), name='mask')

    # Our LSTM will have 250 hidden/cell units
    N_HIDDEN = lstm_size
    l_lstm = LSTMLayer(
        l_in, N_HIDDEN,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm1')

    '''
    # The "backwards" layer is the same as the first,
    # except that the backwards argument is set to True.
    l_lstm_back = LSTMLayer(
        l_in, N_HIDDEN, ingate=gate_parameters,
        mask_input=l_mask, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        learn_init=True, grad_clipping=5., backwards=True)
    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum = ElemwiseSumLayer([l_lstm, l_lstm_back])

    # implement drop-out regularization
    l_dropout = DropoutLayer(l_sum)

    l_lstm2 = LSTMLayer(
        l_dropout, N_HIDDEN,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5.)

    # The "backwards" layer is the same as the first,
    # except that the backwards argument is set to True.
    l_lstm_back2 = LSTMLayer(
        l_dropout, N_HIDDEN, ingate=gate_parameters,
        mask_input=l_mask, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        learn_init=True, grad_clipping=5., backwards=True)

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm_back2])
    '''
    # The l_forward layer creates an output of dimension (batch_size, SEQ_LENGTH, N_HIDDEN)
    # Since we are only interested in the final prediction, we isolate that quantity and feed it to the next layer.
    # The output of the sliced layer will then be of size (batch_size, N_HIDDEN)
    l_forward_slice = SliceLayer(l_lstm, -1, 1, name='slice')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice, num_units=output_size, nonlinearity=las.nonlinearities.softmax, name='output')

    print_network(l_out)
    # draw_to_file(las.layers.get_all_layers(l_out), 'network.png')

    # Symbolic variable for the target network output.
    # It will be of shape n_batch, because there's only 1 target value per sequence.
    target_values = T.ivector('target_output')

    # This matrix will tell the network the length of each sequences.
    # The actual values will be supplied by the gen_data function.
    mask = T.matrix('mask')

    # lasagne.layers.get_output produces an expression for the output of the net
    prediction = las.layers.get_output(l_out)

    # The value we care about is the final value produced for each sequence
    # so we simply slice it out.
    # predicted_values = network_output[:, -1]

    # Our cost will be categorical cross entropy error
    cost = T.mean(las.objectives.categorical_crossentropy(prediction, target_values))
    # cost = T.mean((predicted_values - target_values) ** 2)
    # Retrieve all parameters from the network
    all_params = las.layers.get_all_params(l_out, trainable=True)
    # Compute adam updates for training
    # updates = las.updates.adam(cost, all_params)
    updates = adadelta(cost, all_params)
    # Theano functions for training and computing cost
    train = theano.function(
        [l_in.input_var, target_values, l_mask.input_var],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function(
        [l_in.input_var, target_values, l_mask.input_var], cost, allow_input_downcast=True)

    test_prediction = las.layers.get_output(l_out, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_prediction, target_values))
    compute_val_cost = theano.function([l_in.input_var, target_values, l_mask.input_var],
                                       test_cost, allow_input_downcast=True)
    val_fn = theano.function([l_in.input_var, l_mask.input_var], test_prediction, allow_input_downcast=True)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val = next(val_data_gen)

    # We'll train the network with 10 epochs of 100 minibatches each
    cost_train = []
    cost_val = []
    class_rate = []
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0
    NUM_EPOCHS = 30
    EPOCH_SIZE = 26
    STRIP_SIZE = 3
    MAX_LOSS = 0.05
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for _ in range(EPOCH_SIZE):
            X, y, m, _ = next(train_data_gen)
            train(X, y, m)
        train_cost = compute_train_cost(X, y, m)
        val_cost = compute_val_cost(X_val, y_val, mask_val)
        cr, conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        cost_train.append(train_cost)
        cost_val.append(val_cost)
        class_rate.append(cr)
        train_strip[epoch % STRIP_SIZE] = train_cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            best_conf = conf

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
               'h', 'i', 'j', 'k', 'l', 'm', 'n',
               'o', 'p', 'q', 'r', 's', 't', 'u',
               'v', 'w', 'x', 'y', 'z']

    print('Final Model')
    print('classification rate: {}'.format(best_cr))
    print('validation loss: {}'.format(best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #25

ファイルを表示

ファイル: 2stream_nodelta.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('stream2'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('preprocessing dataset...')

    # stream 1
    s1_data = load_mat_file(config.get('stream1', 'data'))
    s1_imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')])
    s1 = config.get('stream1', 'model')
    s1_inputdim = config.getint('stream1', 'input_dimensions')
    s1_shape = config.get('stream1', 'shape')
    s1_nonlinearities = config.get('stream1', 'nonlinearities')

    # stream 2
    s2_data = load_mat_file(config.get('stream2', 'data'))
    s2_imagesize = tuple([int(d) for d in config.get('stream2', 'imagesize').split(',')])
    s2 = config.get('stream2', 'model')
    s2_inputdim = config.getint('stream2', 'input_dimensions')
    s2_shape = config.get('stream2', 'shape')
    s2_nonlinearities = config.get('stream2', 'nonlinearities')

    # lstm classifier
    fusiontype = config.get('lstm_classifier', 'fusiontype')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier',
                                                                                                  'use_peepholes')
    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier', 'output_classnames').split(',')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file(config.get('training', 'train_subjects_file'))
    val_subject_ids = read_data_split_file(config.get('training', 'val_subjects_file'))
    test_subject_ids = read_data_split_file(config.get('training', 'test_subjects_file'))

    s1_data_matrix = s1_data['dataMatrix'].astype('float32')
    s2_data_matrix = s2_data['dataMatrix'].astype('float32')
    targets_vec = s1_data['targetsVec'].reshape((-1,))
    subjects_vec = s1_data['subjectsVec'].reshape((-1,))
    vidlen_vec = s1_data['videoLengthVec'].reshape((-1,))

    if matlab_target_offset:
        targets_vec -= 1

    s1_data_matrix = presplit_dataprocessing(s1_data_matrix, vidlen_vec, config, 'stream1', imagesize=s1_imagesize)
    s2_data_matrix = presplit_dataprocessing(s2_data_matrix, vidlen_vec, config, 'stream2', imagesize=s2_imagesize)

    s1_train_X, s1_train_y, s1_train_vidlens, s1_train_subjects, \
    s1_val_X, s1_val_y, s1_val_vidlens, s1_val_subjects, \
    s1_test_X, s1_test_y, s1_test_vidlens, s1_test_subjects = split_seq_data(s1_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s2_train_X, s2_train_y, s2_train_vidlens, s2_train_subjects, \
    s2_val_X, s2_val_y, s2_val_vidlens, s2_val_subjects, \
    s2_test_X, s2_test_y, s2_test_vidlens, s2_test_subjects = split_seq_data(s2_data_matrix, targets_vec, subjects_vec,
                                                                             vidlen_vec, train_subject_ids,
                                                                             val_subject_ids, test_subject_ids)

    s1_train_X, s1_val_X, s1_test_X = postsplit_datapreprocessing(s1_train_X, s1_val_X, s1_test_X, config, 'stream1')
    s2_train_X, s2_val_X, s2_test_X = postsplit_datapreprocessing(s2_train_X, s2_val_X, s2_test_X, config, 'stream2')

    ae1 = load_decoder(s1, s1_shape, s1_nonlinearities)
    ae2 = load_decoder(s2, s2_shape, s2_nonlinearities)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs1 = T.tensor3('inputs1', dtype='float32')
    inputs2 = T.tensor3('inputs2', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network, l_fuse = adenet_v2_nodelta.create_model(ae1, ae2, (None, None, s1_inputdim), inputs1,
                                                     (None, None), mask,
                                                     (None, None, s2_inputdim), inputs2,
                                                     lstm_size, output_classes, fusiontype,
                                                     w_init_fn=weight_init_fn,
                                                     use_peepholes=use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [inputs1, targets, mask, inputs2],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs1, targets, mask, inputs2],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs1, targets, mask, inputs2], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs1, mask, inputs2], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(s1_train_X, s1_train_y, s1_train_vidlens, batchsize=batchsize)
    integral_lens = compute_integral_len(s1_train_vidlens)

    val_datagen = gen_lstm_batch_random(s1_val_X, s1_val_y, s1_val_vidlens, batchsize=len(s1_val_vidlens))
    test_datagen = gen_lstm_batch_random(s1_test_X, s1_test_y, s1_test_vidlens, batchsize=len(s1_test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(s1_val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(s2_val_X, idxs_val, s1_val_vidlens, integral_lens_val, np.max(s1_val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(s1_test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(s2_test_X, idxs_test, s1_test_vidlens, integral_lens_test, np.max(s1_test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(s2_train_X, batch_idxs,
                                            s1_train_vidlens, integral_lens, np.max(s1_train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))

    # plot confusion matrix
    table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe')
    print('confusion matrix: ')
    print(table_str)

    if 'save_plot' in options:
        prefix = options['save_plot']
        plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix))
        with open('{}.confmat.txt'.format(prefix), mode='a') as f:
            f.write(table_str)
            f.write('\n\n')

    if 'write_results' in options:
        print('writing results to {}'.format(options['write_results']))
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

    if 'save_best' in options:
        print('saving best model...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('best model saved to {}'.format(options['save_best']))

コード例 #26

ファイルを表示

ファイル: evaluate_delta_features.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    print('preprocessing dataset...')
    data = load_mat_file('data/allData_mouthROIs.mat')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix']
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets), ))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets), ))

    # indexes for a particular letter
    # idx = [i for i, elem in enumerate(test_targets) if elem == 20]

    # resize the input data to 40 x 30
    train_data_resized = resize_images(train_data).astype(np.float32)

    # normalize the inputs [0 - 1]
    train_data_resized = normalize_input(train_data_resized, centralize=True)

    test_data_resized = resize_images(test_data).astype(np.float32)
    test_data_resized = normalize_input(test_data_resized, centralize=True)

    print('compute delta features and featurewise normalize...')
    encode_fn = compile_encoder()
    deltafeatures = concat_first_second_deltas(encode_fn(train_data_resized),
                                               train_vidlen_vec)[:, -100:]
    deltafeatures_val = concat_first_second_deltas(
        encode_fn(test_data_resized), test_vidlen_vec)[:, -100:]

    deltafeatures, mean, std = featurewise_normalize_sequence(deltafeatures)
    deltafeatures_val = (deltafeatures_val - mean) / std

    print('train delta features: {}'.format(deltafeatures.shape))
    print('validation delta features: {}'.format(deltafeatures_val.shape))

    gate_p, cell_p = generate_lstm_parameters()

    # create lstm
    input_var = T.tensor3('input', dtype='float32')
    mask_var = T.matrix('mask', dtype='uint8')
    target_var = T.ivector('target')
    window_var = T.iscalar('window')
    lr = theano.shared(np.array(0.7, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(0.80, dtype=theano.config.floatX)

    l_input = InputLayer((None, None, 100), input_var, name='input')
    l_mask = InputLayer((None, None), mask_var, name='mask')
    l_lstm = LSTMLayer(
        l_input,
        250,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_p,
        forgetgate=gate_p,
        cell=cell_p,
        outgate=gate_p,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm')
    l_forward_slice1 = SliceLayer(l_lstm, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    network = DenseLayer(l_forward_slice1,
                         num_units=26,
                         nonlinearity=las.nonlinearities.softmax,
                         name='output')
    print_network(network)

    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(
        las.objectives.categorical_crossentropy(predictions, target_var))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.adam(cost, all_params, learning_rate=lr)

    train = theano.function([input_var, target_var, mask_var],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([input_var, target_var, mask_var],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, target_var))
    compute_test_cost = theano.function([input_var, target_var, mask_var],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([input_var, mask_var],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 40
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    # create train and eval loop
    data_gen = gen_lstm_batch_random(deltafeatures,
                                     train_targets,
                                     train_vidlen_vec,
                                     batchsize=BATCH_SIZE)
    data_gen_val = gen_lstm_batch_random(deltafeatures_val,
                                         test_targets,
                                         test_vidlen_vec,
                                         batchsize=len(test_vidlen_vec))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(data_gen_val)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(data_gen)
            train(X, y, m)
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break
        # learning rate decay
        if epoch > 8:
            lr.set_value(lr.get_value() * lr_decay)

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #27

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    fusiontype = config.get('models', 'fusiontype')

    # capture training parameters
    update_rule = options[
        'update_rule'] if 'update_rule' in options else config.get(
            'training', 'update_rule')
    learning_rate = float(options['learning_rate']) \
        if 'learning_rate' in options else config.getfloat('training', 'learning_rate')
    decay_rate = float(
        options['decay_rate']) if 'decay_rate' in options else config.getfloat(
            'training', 'decay_rate')
    decay_start = int(
        options['decay_start']) if 'decay_start' in options else config.getint(
            'training', 'decay_start')
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    t1 = int(options['t1']) if 't1' in options else config.getint(
        'training', 't1')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'training', 'use_peepholes')
    use_blstm = config.getboolean('training', 'use_blstm')
    use_finetuning = config.getboolean('training', 'use_finetuning')

    if update_rule == 'sgdm' or update_rule == 'sgdnm':
        momentum = float(
            options['momentum']) if 'momentum' in options else config.getfloat(
                'training', 'momentum')
        momentum_schedule = options['momentum_schedule'] \
            if 'momentum_schedule' in options else config.get('training', 'momentum_schedule')
        mm_schedule = [float(m) for m in momentum_schedule.split(',')]

    weight_init_fn = las.init.Orthogonal()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    # create the necessary variable mappings
    data_matrix = data['dataMatrix'].astype('float32')
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']
    dct_feats = dct_data['dctFeatures'].astype('float32')

    print('samplewise normalize images...')
    data_matrix = normalize_input(data_matrix, True)
    # mean remove
    # dct_feats = dct_feats[:, 0:30]
    # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,)))

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets), ))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets), ))

    # split the dct features
    train_dct = dct_feats[indexes == True].astype(np.float32)
    test_dct = dct_feats[indexes == False].astype(np.float32)
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    finetune = False
    if finetune:
        print('fine-tuning...')
        dbn = load_dbn(ae_pretrained)
        dbn.initialize()
        dbn.fit(train_data, train_data)
        res = dbn.predict(test_data)
        # print(res.shape)
        visualize_reconstruction(test_data[300:336], res[300:336])

    save = False
    if save:
        pickle.dump(dbn, open(ae_finetuned, 'wb'))

    load = True
    if load:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()
        # recon = dbn.predict(test_data)
        # visualize_reconstruction(test_data[300:364], recon[300:364])
        # exit()

    load_convae = False
    if load_convae:
        print('loading pre-trained convolutional autoencoder...')
        encoder = load_model('models/conv_encoder_norm.dat')
        inputs = las.layers.get_all_layers(encoder)[0].input_var
    else:
        inputs = T.tensor3('inputs', dtype='float32')
    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    # targets = T.ivector('targets')
    targets = T.imatrix('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    if update_rule == 'sgdm' or update_rule == 'sgdnm':
        mm = theano.shared(np.array(momentum, dtype=theano.config.floatX),
                           name='momentum')

    print('constructing end to end model...')

    if use_blstm:
        network, l_fuse = adenet_v2.create_model(dbn, (None, None, 1200),
                                                 inputs, (None, None),
                                                 mask, (None, None, 90),
                                                 dct,
                                                 250,
                                                 window,
                                                 26,
                                                 fusiontype,
                                                 w_init_fn=weight_init_fn,
                                                 use_peepholes=use_peepholes)
    else:
        network, l_fuse = adenet_v2_3.create_model(dbn, (None, None, 1200),
                                                   inputs, (None, None),
                                                   mask, (None, None, 90),
                                                   dct,
                                                   250,
                                                   window,
                                                   26,
                                                   fusiontype,
                                                   w_init_fn=weight_init_fn,
                                                   use_peepholes=use_peepholes)

    print_network(network)
    draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    # cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    cost = temporal_softmax_loss(predictions, targets, mask)
    if update_rule == 'adadelta':
        updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    if update_rule == 'sgdm':
        updates = las.updates.sgd(cost, all_params, learning_rate=lr)
        updates = las.updates.apply_momentum(updates, all_params, momentum=mm)
    if update_rule == 'sgdnm':
        updates = las.updates.sgd(cost, all_params, learning_rate=lr)
        updates = las.updates.apply_nesterov_momentum(updates,
                                                      all_params,
                                                      momentum=mm)
    if update_rule == 'adam':
        updates = las.updates.adam(cost, all_params)

    train = theano.function([inputs, targets, mask, dct, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, dct, window],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    # test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function([inputs, targets, mask, dct, window],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, dct, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data,
                                    train_targets,
                                    train_vidlen_vec,
                                    batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_data,
                                        test_targets,
                                        test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))
    integral_lens = compute_integral_len(train_vidlen_vec)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlen_vec)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val,
                                     test_vidlen_vec, integral_lens_val,
                                     np.max(test_vidlen_vec))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec,
                                       integral_lens, np.max(train_vidlen_vec))
            if update_rule == 'adam':
                print_str = 'Epoch {} batch {}/{}: {} examples with {} using default params'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X), update_rule)
            if update_rule == 'adadelta':
                print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f} with {}'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X),
                    float(lr.get_value()), update_rule)
            if update_rule == 'sgdm' or update_rule == 'sgdnm':
                print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}, ' \
                            'momentum = {:.4f} with {}'.format(
                    epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), float(mm.get_value()), update_rule)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val,
                                     WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       dct_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
        else:
            if epoch >= t1 and (update_rule == 'sgdm'
                                or update_rule == 'sgdnm'):
                lr.set_value(max(lr.get_value() * lr_decay, 0.001))
                if mm_schedule:
                    mm.set_value(mm_schedule.pop(0))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

        # learning rate decay
        if epoch + 1 >= decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    if fusiontype == 'adasum':
        adascale_param = las.layers.get_all_param_values(l_fuse,
                                                         scaling_param=True)
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    if not options['no_plot']:
        plot_confusion_matrix(best_conf, letters, fmt='latex')
        plot_validation_cost(cost_train, cost_val, class_rate,
                             'e2e_valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{}\n'.format(validation_window, weight_init,
                                              use_peepholes, use_blstm,
                                              use_finetuning))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

            f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))

コード例 #28

ファイルを表示

ファイル: evaluate_delta_features.py プロジェクト: konatasick/ip-avsr

def train_deltanet(save=True):
    configure_theano()
    print('preprocessing dataset...')
    data = load_mat_file('data/allData_mouthROIs.mat')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix']
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets), ))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets), ))

    # indexes for a particular letter
    # idx = [i for i, elem in enumerate(test_targets) if elem == 20]

    # resize the input data to 40 x 30
    train_data_resized = resize_images(train_data).astype(np.float32)

    # normalize the inputs [0 - 1]
    train_data_resized = normalize_input(train_data_resized, centralize=True)

    test_data_resized = resize_images(test_data).astype(np.float32)
    test_data_resized = normalize_input(test_data_resized, centralize=True)

    input_var = T.tensor3('input', dtype='float32')
    mask_var = T.matrix('mask', dtype='uint8')
    window_var = T.iscalar('window')
    target_var = T.ivector('target')
    lr = theano.shared(np.array(1.0, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(0.90, dtype=theano.config.floatX)

    dbn = load_finetuned_dbn('models/avletters_ae_finetune.dat')
    network = deltanet.create_model(dbn, (None, None, 1200), input_var,
                                    (None, None), mask_var, 250, window_var)
    print_network(network)

    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(
        las.objectives.categorical_crossentropy(predictions, target_var))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.adam(cost, all_params, learning_rate=lr)

    train = theano.function([input_var, target_var, mask_var, window_var],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [input_var, target_var, mask_var, window_var],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, target_var))
    compute_test_cost = theano.function(
        [input_var, target_var, mask_var, window_var],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([input_var, mask_var, window_var],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 40
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    # create train and eval loop
    data_gen = gen_lstm_batch_random(train_data_resized,
                                     train_targets,
                                     train_vidlen_vec,
                                     batchsize=BATCH_SIZE)
    data_gen_val = gen_lstm_batch_random(test_data_resized,
                                         test_targets,
                                         test_vidlen_vec,
                                         batchsize=len(test_vidlen_vec))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(data_gen_val)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(data_gen)
            train(X, y, m, WINDOW_SIZE)
        cost = compute_train_cost(X, y, m, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model1(X_val, y_val, mask_val, WINDOW_SIZE,
                                       val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            if best_cr > 0.55:
                print('saving a good encoder...')
                encoder = extract_encoder(network, (None, 1200), 2, 6)
                pickle.dump(encoder, open('models/end2end_encoder.dat', 'wb'))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break
        # learning rate decay
        if epoch > 12:
            lr.set_value(lr.get_value() * lr_decay)

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate)

コード例 #29

ファイルを表示

ファイル: trimodal.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    config_file = 'config/trimodal.ini'
    print('loading config file: {}'.format(config_file))
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    use_adascale = config.getboolean('models', 'use_adascale')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y),))
    dct_feats = dct_data['dctFeatures'].astype('float32')
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects),))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens,)))

    # X = reorder_data(X, (26, 44), 'f', 'c')
    # print('performing sequencewise mean image removal...')
    # X = sequencewise_mean_image_subtraction(X, video_lens)
    # visualize_images(X[550:650], (26, 44))
    X_diff = compute_diff_images(X, video_lens)

    # mean remove dct features
    dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens)

    train_subject_ids = read_data_split_file('data/train_val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print(train_subject_ids)
    print(test_subject_ids)
    train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \
    test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \
        split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids)

    assert train_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    # featurewise normalize dct features
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    if do_finetune:
        print('performing finetuning on pretrained encoder: {}'.format(ae_pretrained))
        ae = load_dbn(ae_pretrained)
        ae.initialize()
        ae.fit(train_X, train_X)

    if save_finetune:
        print('saving finetuned encoder: {}...'.format(ae_finetuned))
        pickle.dump(ae, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading finetuned encoder: {}...'.format(ae_finetuned))
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading finetuned encoder: {}...'.format(ae_finetuned_diff))
        ae_diff = pickle.load(open(ae_finetuned_diff, 'rb'))
        ae_diff.initialize()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    '''
    network = create_end_to_end_model(dbn, (None, None, 1144), inputs,
                                      (None, None), mask, 250, window)
    '''

    network, adascale = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs,
                                               (None, None), mask,
                                               (None, None, 90), dct,
                                               (None, None, 1144), inputs_diff,
                                               250, window, 10, use_adascale)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = adagrad(cost, all_params, learning_rate=lr)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean())

    train = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask, dct, inputs_diff, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 120
    BATCH_SIZE = 10
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens,
                                        batchsize=len(test_vidlens))
    integral_lens = compute_integral_len(train_vidlens)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlens)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens))
    X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs,
                                       train_vidlens, integral_lens, np.max(train_vidlens))
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            if use_adascale:
                adascale_param = las.layers.get_all_param_values(adascale, scaling_param=True)

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch >= decay_start - 1:
            lr.set_value(lr.get_value() * lr_decay)

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('classification rate: {}, validation loss: {}'.format(best_cr, best_val))
    if use_adascale:
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate, savefilename='valid_cost')

コード例 #30

ファイルを表示

ファイル: audio_visual_runner.py プロジェクト: behtak/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    data_audio = load_mat_file(config.get('data', 'audio'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_diff_pretrained = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')
    lstm_size = config.getint('models', 'lstm_size')
    output_classes = config.getint('models', 'output_classes')
    nonlinearity = options['nonlinearity'] if 'nonlinearity' in options else config.get('models', 'nonlinearity')

    if nonlinearity == 'sigmoid':
        nonlinearity = sigmoid
    if nonlinearity == 'rectify':
        nonlinearity = rectify

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training',
                                                                                                  'use_peepholes')
    input_dimension = config.getint('models', 'input_dimension')
    input_dimension2 = config.getint('models', 'input_dimension2')

    use_blstm = config.getboolean('training', 'use_blstm')
    use_finetuning = config.getboolean('training', 'use_finetuning')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,))
    val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,))
    test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,))
    train_X = data['trData'].astype('float32')
    val_X = data['valData'].astype('float32')
    test_X = data['testData'].astype('float32')
    train_X_audio = data_audio['trData'].astype('float32')
    val_X_audio = data_audio['valData'].astype('float32')
    test_X_audio = data_audio['testData'].astype('float32')
    # +1 to handle the -1 introduced in lstm_gendata
    train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1
    val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1
    test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1

    train_X = reorder_data(train_X, (30, 50))
    val_X = reorder_data(val_X, (30, 50))
    test_X = reorder_data(test_X, (30, 50))

    visual_weights, visual_biases = load_dbn(ae_pretrained)
    audio_weights, audio_biases = load_dbn(ae_diff_pretrained)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    visual_input = T.tensor3('visual_input', dtype='float32')
    audio_input = T.tensor3('audio_input', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    visual_net = avnet.create_pretrained_substream(visual_weights, visual_biases,
                                                   (None, None, input_dimension), visual_input,
                                                   (None, None), mask, 'visual',
                                                   lstm_size, window, nonlinearity, weight_init_fn, use_peepholes)

    audio_net = avnet.create_pretrained_substream(audio_weights, audio_biases,
                                                  (None, None, input_dimension2), audio_input,
                                                  (None, None), mask, 'audio',
                                                  lstm_size, window, nonlinearity, weight_init_fn, use_peepholes)
    network, l_fuse = avnet.create_model([visual_net, audio_net], (None, None), mask, lstm_size, output_classes,
                                         fusiontype, weight_init_fn, use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function(
        [visual_input, targets, mask, audio_input, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([visual_input, targets, mask, audio_input, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [visual_input, targets, mask, audio_input, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([visual_input, mask, audio_input, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    EPOCH_SIZE = 90
    BATCH_SIZE = 10
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_audio, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_audio, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_audio, batch_idxs,
                                            train_vidlens, integral_lens, np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_tr = cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, WINDOW_SIZE, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(use_finetuning, 'yes', use_peepholes,
                                                                   'adam', weight_init, 'RELU',
                                                                   use_blstm, learning_rate, best_tr,
                                                                   best_val, best_cr*100, test_cr*100))

            s = ','.join([str(v) for v in cost_train])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in cost_val])
            f.write('{}\n'.format(s))

            s = ','.join([str(v) for v in class_rate])
            f.write('{}\n'.format(s))

コード例 #31

ファイルを表示

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    dct_data = load_mat_file(config.get('data', 'dct'))
    diff_data = load_mat_file(config.get('data', 'diff'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    ae_finetuned_diff = config.get('models', 'finetuned_diff')
    fusiontype = config.get('models', 'fusiontype')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')
    load_finetune_diff = config.getboolean('training', 'load_finetune_diff')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix']
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']
    dct_feats = dct_data['dctFeatures']
    diff_data_matrix = diff_data['dataMatrix']

    # samplewise normalize
    # print('sameplewise mean normalize...')
    # data_matrix = normalize_input(data_matrix)
    # diff_data_matrix = normalize_input(diff_data_matrix)
    # diff_data_matrix = compute_diff_images(data_matrix, vid_len_vec.reshape((-1,))).astype('float32')

    # mean remove
    # dct_feats = dct_feats[:, 0:30]
    # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,)))

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets), ))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets), ))
    train_diff_data = diff_data_matrix[indexes == True]
    test_diff_data = diff_data_matrix[indexes == False]

    # split the dct features + featurewise mean normalize
    train_dct = dct_feats[indexes == True].astype(np.float32)
    test_dct = dct_feats[indexes == False].astype(np.float32)
    train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct)
    test_dct = (test_dct - dct_mean) / dct_std

    if do_finetune:
        print('fine-tuning...')
        ae = load_dbn(ae_pretrained)
        ae.initialize()
        ae.fit(train_data, train_data)
        res = ae.predict(test_data)
        # print(res.shape)
        visualize_reconstruction(test_data[300:336], res[300:336])

    if save_finetune:
        pickle.dump(ae, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading pre-trained encoding layers...')
        ae = pickle.load(open(ae_finetuned, 'rb'))
        ae.initialize()

    if load_finetune_diff:
        print('loading pre-trained diff image encoding layers...')
        diff_ae = pickle.load(open(ae_finetuned_diff, 'rb'))
        diff_ae.initialize()

    load_convae = False
    if load_convae:
        print('loading pre-trained convolutional autoencoder...')
        encoder = load_model('models/conv_encoder_norm.dat')
        inputs_raw = las.layers.get_all_layers(encoder)[0].input_var
    else:
        inputs_raw = T.tensor3('inputs_raw', dtype='float32')
        inputs_diff = T.tensor3('inputs_diff', dtype='float32')

    window = T.iscalar('theta')
    dct = T.tensor3('dct', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    '''
    network = adenet_v1.create_model(dbn, (None, None, 1200), inputs,
                                     (None, None), mask,
                                     (None, None, 90), dct,
                                     250, window)

    network = deltanet.create_model(dbn, (None, None, 1200), inputs,
                                    (None, None), mask,
                                    250, window)

    network = adenet_v2.create_model(dbn, (None, None, 1200), inputs,
                                     (None, None), mask,
                                     (None, None, 90), dct,
                                     250, window)


    network = adenet_v2.create_model(ae, (None, None, 1200), inputs_raw,
                                     (None, None), mask,
                                     (None, None, 90), dct,
                                     250, window)
    '''

    network, l_fuse = adenet_v3.create_model(ae, diff_ae, (None, None, 1200),
                                             inputs_raw, (None, None), mask,
                                             (None, None, 90), dct,
                                             (None, None, 1200), inputs_diff,
                                             250, window, 26, fusiontype)

    print_network(network)
    draw_to_file(las.layers.get_all_layers(network), 'adenet_v3.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.adam(cost, all_params, learning_rate=lr)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(
                    param,
                    MAX_NORM *
                    las.utils.compute_norms(param.get_value()).mean())

    train = theano.function(
        [inputs_raw, targets, mask, dct, inputs_diff, window],
        cost,
        updates=updates,
        allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs_raw, targets, mask, dct, inputs_diff, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs_raw, targets, mask, dct, inputs_diff, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs_raw, mask, dct, inputs_diff, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 25
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data,
                                    train_targets,
                                    train_vidlen_vec,
                                    batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_data,
                                        test_targets,
                                        test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))
    integral_lens = compute_integral_len(train_vidlen_vec)

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(test_vidlen_vec)
    dct_val = gen_seq_batch_from_idx(test_dct, idxs_val,
                                     test_vidlen_vec, integral_lens_val,
                                     np.max(test_vidlen_vec))
    diff_val = gen_seq_batch_from_idx(test_diff_data, idxs_val,
                                      test_vidlen_vec, integral_lens_val,
                                      np.max(test_vidlen_vec))

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec,
                                       integral_lens, np.max(train_vidlen_vec))
            diff = gen_seq_batch_from_idx(train_diff_data, batch_idxs,
                                          train_vidlen_vec, integral_lens,
                                          np.max(train_vidlen_vec))
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, d, diff, WINDOW_SIZE)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, d, diff, WINDOW_SIZE)
        val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, diff_val,
                                     WINDOW_SIZE)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val,
                                      diff_val, WINDOW_SIZE, val_fn)
        class_rate.append(cr)

        print(
            "Epoch {} train cost = {}, validation cost = {}, "
            "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
            .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                    time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(
                    l_fuse, scaling_param=True)

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch >= decay_start - 1:
            lr.set_value(lr.get_value() * lr_decay)

    letters = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(
        best_cr, best_val))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='latex')
    plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost')

    if options['write_results']:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))

コード例 #32

ファイルを表示

ファイル: separate_train.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    config_file = 'config/separate_train.ini'
    print('loading config file: {}'.format(config_file))
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    lstm_units = int(config.get('training', 'lstm_units'))
    output_units = int(config.get('training', 'output_units'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype('float32')  # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44))
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y),))
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects),))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens,)))

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = \
        split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens)
    assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    if do_finetune:
        dbn = load_dbn(ae_pretrained)
        dbn.initialize()
        dbn.fit(train_X, train_X)
        recon = dbn.predict(test_X)
        visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)),
                                 reorder_data(recon[800:864], (26, 44)),
                                 shape=(26, 44))

    if save_finetune:
        pickle.dump(dbn, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()
        # recon = dbn.predict(test_X)
        # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)),
        #                         reorder_data(recon[800:864], (26, 44)),
        #                         shape=(26, 44))

    encoder = extract_encoder(dbn)
    train_X = encoder.predict(train_X)
    val_X = encoder.predict(val_X)
    test_X = encoder.predict(test_X)

    # train_X = concat_first_second_deltas(train_X, train_vidlens)
    # val_X = concat_first_second_deltas(val_X, val_vidlens)
    # test_X = concat_first_second_deltas(test_X, test_vidlens)

    # featurewise normalize
    train_X, mean, std = featurewise_normalize_sequence(train_X)
    val_X = (val_X - mean) / std
    test_X = (test_X - mean) / std


    # recon = dbn.predict(test_X)
    # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44))
    # exit()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing lstm classifier...')
    network = lstm_classifier_baseline.create_model((None, None, 50), inputs,
                                                    (None, None), mask,
                                                    lstm_units, output_units)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean())

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 120
    BATCH_SIZE = 10
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 10
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)
    X_test, y_test, mask_test, _ = next(test_datagen)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m)
            print('\r', end='')
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch > decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

コード例 #33

ファイルを表示

ファイル: separate_train.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    config_file = 'config/separate_train.ini'
    print('loading config file: {}'.format(config_file))
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    lstm_units = int(config.get('training', 'lstm_units'))
    output_units = int(config.get('training', 'output_units'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')

    # 53 subjects, 70 utterances, 5 view angles
    # s[x]_v[y]_u[z].mp4
    # resized, height, width = (26, 44)
    # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec',
    # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__']

    print(data.keys())
    X = data['dataMatrix'].astype(
        'float32')  # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44))
    y = data['targetsVec'].astype('int32')
    y = y.reshape((len(y), ))
    uniques = np.unique(y)
    print('number of classifications: {}'.format(len(uniques)))
    subjects = data['subjectsVec'].astype('int')
    subjects = subjects.reshape((len(subjects), ))
    video_lens = data['videoLengthVec'].astype('int')
    video_lens = video_lens.reshape((len(video_lens, )))

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')
    print('Train: {}'.format(train_subject_ids))
    print('Validation: {}'.format(val_subject_ids))
    print('Test: {}'.format(test_subject_ids))

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = \
        split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids)

    assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X)
    assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y)
    assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[
        0] == len(video_lens)
    assert train_subjects.shape[0] + val_subjects.shape[
        0] + test_subjects.shape[0] == len(subjects)

    train_X = normalize_input(train_X, centralize=True)
    test_X = normalize_input(test_X, centralize=True)

    if do_finetune:
        dbn = load_dbn(ae_pretrained)
        dbn.initialize()
        dbn.fit(train_X, train_X)
        recon = dbn.predict(test_X)
        visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)),
                                 reorder_data(recon[800:864], (26, 44)),
                                 shape=(26, 44))

    if save_finetune:
        pickle.dump(dbn, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()
        # recon = dbn.predict(test_X)
        # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)),
        #                         reorder_data(recon[800:864], (26, 44)),
        #                         shape=(26, 44))

    encoder = extract_encoder(dbn)
    train_X = encoder.predict(train_X)
    val_X = encoder.predict(val_X)
    test_X = encoder.predict(test_X)

    # train_X = concat_first_second_deltas(train_X, train_vidlens)
    # val_X = concat_first_second_deltas(val_X, val_vidlens)
    # test_X = concat_first_second_deltas(test_X, test_vidlens)

    # featurewise normalize
    train_X, mean, std = featurewise_normalize_sequence(train_X)
    val_X = (val_X - mean) / std
    test_X = (test_X - mean) / std

    # recon = dbn.predict(test_X)
    # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44))
    # exit()

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX),
                       name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing lstm classifier...')
    network = lstm_classifier_baseline.create_model(
        (None, None, 50), inputs, (None, None), mask, lstm_units, output_units)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(
        predictions, targets))
    updates = adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(
                    param,
                    MAX_NORM *
                    las.utils.compute_norms(param.get_value()).mean())

    train = theano.function([inputs, targets, mask],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask],
                                         cost,
                                         allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(
        las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function([inputs, targets, mask],
                                        test_cost,
                                        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 120
    BATCH_SIZE = 10
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 10
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, _ = next(val_datagen)
    X_test, y_test, mask_test, _ = next(test_datagen)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, _ = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m)
            print('\r', end='')
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr
            test_cr, test_conf = evaluate_model(X_test, y_test, mask_test,
                                                val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch > decay_start:
            lr.set_value(lr.get_value() * lr_decay)

    phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, phrases, fmt='grid')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

コード例 #34

ファイルを表示

ファイル: bimodal_diff_with_val.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('data'))
    print(config.items('models'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_pretrained_diff = config.get('models', 'pretrained_diff')
    fusiontype = config.get('models', 'fusiontype')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(
        options['num_epoch']) if 'num_epoch' in options else config.getint(
            'training', 'num_epoch')
    weight_init = options[
        'weight_init'] if 'weight_init' in options else config.get(
            'training', 'weight_init')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')
    use_peepholes = options[
        'use_peepholes'] if 'use_peepholes' in options else config.getboolean(
            'training', 'use_peepholes')
    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')
    windowsize = config.getint('training', 'windowsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file('data/train.txt')
    val_subject_ids = read_data_split_file('data/val.txt')
    test_subject_ids = read_data_split_file('data/test.txt')

    data_matrix = data['dataMatrix']
    targets_vec = data['targetsVec'].reshape((-1, ))
    subjects_vec = data['subjectsVec'].reshape((-1, ))
    vidlen_vec = data['videoLengthVec'].reshape((-1, ))

    data_matrix = reorder_data(data_matrix, (30, 50))

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec,
                                                                 train_subject_ids, val_subject_ids, test_subject_ids)

    train_X_diff = compute_diff_images(train_X, train_vidlens)
    val_X_diff = compute_diff_images(val_X, val_vidlens)
    test_X_diff = compute_diff_images(test_X, test_vidlens)

    train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens)
    val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens)
    test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens)

    ae = load_dbn(ae_pretrained)
    ae_diff = load_dbn(ae_pretrained_diff)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs = T.tensor3('inputs', dtype='float32')
    inputs_diff = T.tensor3('inputs_diff', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network, l_fuse = adenet_v2_2.create_model(ae,
                                               ae_diff, (None, None, 1500),
                                               inputs, (None, None),
                                               mask, (None, None, 1500),
                                               inputs_diff,
                                               250,
                                               window,
                                               10,
                                               fusiontype,
                                               w_init_fn=weight_init_fn,
                                               use_peepholes=use_peepholes)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png')
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    updates = adam(cost, all_params, learning_rate=learning_rate)

    train = theano.function([inputs, targets, mask, inputs_diff, window],
                            cost,
                            updates=updates,
                            allow_input_downcast=True)
    compute_train_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        cost,
        allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs, targets, mask, inputs_diff, window],
        test_cost,
        allow_input_downcast=True)

    val_fn = theano.function([inputs, mask, inputs_diff, window],
                             test_predictions,
                             allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE, ))
    best_val = float('inf')
    best_tr = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X,
                                    train_y,
                                    train_vidlens,
                                    batchsize=batchsize)
    integral_lens = compute_integral_len(train_vidlens)

    val_datagen = gen_lstm_batch_random(val_X,
                                        val_y,
                                        val_vidlens,
                                        batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X,
                                         test_y,
                                         test_vidlens,
                                         batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)
    integral_lens_val = compute_integral_len(val_vidlens)
    X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens,
                                        integral_lens_val, np.max(val_vidlens))

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)
    integral_lens_test = compute_integral_len(test_vidlens)
    X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens,
                                         integral_lens_test,
                                         np.max(test_vidlens))

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs,
                                            train_vidlens, integral_lens,
                                            np.max(train_vidlens))
            print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(
                epoch + 1, i + 1, epochsize, len(X))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, X_diff, windowsize)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, X_diff, windowsize)
        val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val,
                                     windowsize)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) /
                     (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val,
                                       X_diff_val, windowsize, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_tr = cost
            best_cr = cr
            if fusiontype == 'adasum':
                adascale_param = las.layers.get_all_param_values(
                    l_fuse, scaling_param=True)
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test,
                                                 X_diff_test, windowsize,
                                                 val_fn)
            print(
                "Epoch {} train cost = {}, val cost = {}, "
                "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                        test_cr,
                        time.time() - time_start))
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)".
                  format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr,
                         time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val,
                                                      validation_window):
            break

    numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val,
                                                     test_cr))
    if fusiontype == 'adasum':
        print("final scaling params: {}".format(adascale_param))
    print('confusion matrix: ')
    plot_confusion_matrix(test_conf, numbers, fmt='latex')
    plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')

    if 'write_results' in options:
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

コード例 #35

ファイルを表示

ファイル: unimodal_nodelta.py プロジェクト: konatasick/ip-avsr

def main():
    configure_theano()
    config_file = 'config/normal.ini'
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    print('loading config file: {}'.format(config_file))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('data', 'images'))
    ae_pretrained = config.get('models', 'pretrained')
    ae_finetuned = config.get('models', 'finetuned')
    learning_rate = float(config.get('training', 'learning_rate'))
    decay_rate = float(config.get('training', 'decay_rate'))
    decay_start = int(config.get('training', 'decay_start'))
    do_finetune = config.getboolean('training', 'do_finetune')
    save_finetune = config.getboolean('training', 'save_finetune')
    load_finetune = config.getboolean('training', 'load_finetune')

    # create the necessary variable mappings
    data_matrix = data['dataMatrix'].astype('float32')
    data_matrix_len = data_matrix.shape[0]
    targets_vec = data['targetsVec']
    vid_len_vec = data['videoLengthVec']
    iter_vec = data['iterVec']

    indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec)
    train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec)
    assert len(train_vidlen_vec) == 520
    assert len(test_vidlen_vec) == 260
    assert np.sum(vid_len_vec) == data_matrix_len

    # split the data
    train_data = data_matrix[indexes == True]
    train_targets = targets_vec[indexes == True]
    train_targets = train_targets.reshape((len(train_targets),))
    test_data = data_matrix[indexes == False]
    test_targets = targets_vec[indexes == False]
    test_targets = test_targets.reshape((len(test_targets),))

    # indexes for a particular letter
    # idx = [i for i, elem in enumerate(test_targets) if elem == 20]

    # resize the input data to 40 x 30
    # train_data_resized = resize_images(train_data).astype(np.float32)

    # normalize the inputs [0 - 1]
    # train_data_resized = normalize_input(train_data_resized, centralize=True)

    # test_data_resized = resize_images(test_data).astype(np.float32)
    # test_data_resized = normalize_input(test_data_resized, centralize=True)

    if do_finetune:
        print('fine-tuning...')
        dbn = load_dbn(ae_pretrained)
        dbn.initialize()
        dbn.fit(train_data, train_data)
        res = dbn.predict(test_data)
        # print(res.shape)
        visualize_reconstruction(test_data[300:336], res[300:336])

    if save_finetune:
        pickle.dump(dbn, open(ae_finetuned, 'wb'))

    if load_finetune:
        print('loading pre-trained encoding layers...')
        dbn = pickle.load(open(ae_finetuned, 'rb'))
        dbn.initialize()
        # res = dbn.predict(test_data)
        # visualize_reconstruction(test_data[300:336], res[300:336])
        # exit()

    load_convae = False
    if load_convae:
        print('loading pre-trained convolutional autoencoder...')
        encoder = load_model('models/conv_encoder_norm.dat')
        inputs = las.layers.get_all_layers(encoder)[0].input_var
    else:
        inputs = T.tensor3('inputs', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.ivector('targets')
    lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate')
    lr_decay = np.array(decay_rate, dtype=theano.config.floatX)

    print('constructing end to end model...')
    network = baseline_end2end.create_model(dbn, (None, None, 1200), inputs,
                                            (None, None), mask, 250)

    print_network(network)
    # draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True)
    # exit()
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets))
    updates = las.updates.adadelta(cost, all_params, learning_rate=lr)
    # updates = las.updates.adam(cost, all_params, learning_rate=lr)

    use_max_constraint = False
    if use_max_constraint:
        MAX_NORM = 4
        for param in las.layers.get_all_params(network, regularizable=True):
            if param.ndim > 1:  # only apply to dimensions larger than 1, exclude biases
                updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean())

    train = theano.function(
        [inputs, targets, mask],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets))
    compute_test_cost = theano.function(
        [inputs, targets, mask], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    NUM_EPOCHS = 30
    EPOCH_SIZE = 20
    BATCH_SIZE = 26
    WINDOW_SIZE = 9
    STRIP_SIZE = 3
    MAX_LOSS = 0.2
    VALIDATION_WINDOW = 4
    val_window = circular_list(VALIDATION_WINDOW)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_conf = None
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE)
    val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec,
                                        batchsize=len(test_vidlen_vec))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)

    def early_stop(cost_window):
        if len(cost_window) < 2:
            return False
        else:
            curr = cost_window[0]
            for idx, cost in enumerate(cost_window):
                if curr < cost or idx == 0:
                    curr = cost
                else:
                    return False
            return True

    for epoch in range(NUM_EPOCHS):
        time_start = time.time()
        for i in range(EPOCH_SIZE):
            X, y, m, batch_idxs = next(datagen)
            print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format(
                epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()))
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m)
            print('\r', end='')
        cost = compute_train_cost(X, y, m)
        val_cost = compute_test_cost(X_val, y_val, mask_val)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn)
        class_rate.append(cr)

        print("Epoch {} train cost = {}, validation cost = {}, "
              "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)"
              .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if val_cost < best_val:
            best_val = val_cost
            best_conf = val_conf
            best_cr = cr

        if epoch >= VALIDATION_WINDOW and early_stop(val_window):
            break

        # learning rate decay
        if epoch > decay_start:  # 20, 8
            lr.set_value(lr.get_value() * lr_decay)

    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
               'h', 'i', 'j', 'k', 'l', 'm', 'n',
               'o', 'p', 'q', 'r', 's', 't', 'u',
               'v', 'w', 'x', 'y', 'z']

    print('Best Model')
    print('classification rate: {}, validation loss: {}'.format(best_cr, best_val))
    print('confusion matrix: ')
    plot_confusion_matrix(best_conf, letters, fmt='grid')
    plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost')

コード例 #36

ファイルを表示

ファイル: 1stream_variable_lr.py プロジェクト: lzuwei/ip-avsr

def main():
    configure_theano()
    options = parse_options()
    config_file = options['config']
    config = ConfigParser.ConfigParser()
    config.read(config_file)

    print('CLI options: {}'.format(options.items()))

    print('Reading Config File: {}...'.format(config_file))
    print(config.items('stream1'))
    print(config.items('lstm_classifier'))
    print(config.items('training'))

    print('preprocessing dataset...')
    data = load_mat_file(config.get('stream1', 'data'))
    stream1 = config.get('stream1', 'model')
    imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')])
    stream1_dim = config.getint('stream1', 'input_dimensions')
    stream1_shape = config.get('stream1', 'shape')
    stream1_nonlinearities = config.get('stream1', 'nonlinearities')

    # lstm classifier
    output_classes = config.getint('lstm_classifier', 'output_classes')
    output_classnames = config.get('lstm_classifier', 'output_classnames').split(',')
    lstm_size = config.getint('lstm_classifier', 'lstm_size')
    matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset')

    # data preprocessing options
    reorderdata = config.getboolean('stream1', 'reorderdata')
    diffimage = config.getboolean('stream1', 'diffimage')
    meanremove = config.getboolean('stream1', 'meanremove')
    samplewisenormalize = config.getboolean('stream1', 'samplewisenormalize')
    featurewisenormalize = config.getboolean('stream1', 'featurewisenormalize')

    # lstm classifier configurations
    weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init')
    use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier',
                                                                                                  'use_peepholes')
    windowsize = config.getint('lstm_classifier', 'windowsize')

    # capture training parameters
    validation_window = int(options['validation_window']) \
        if 'validation_window' in options else config.getint('training', 'validation_window')
    num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch')
    learning_rate = options['learning_rate'] if 'learning_rate' in options \
        else config.getfloat('training', 'learning_rate')

    epochsize = config.getint('training', 'epochsize')
    batchsize = config.getint('training', 'batchsize')

    weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'glorot':
        weight_init_fn = las.init.GlorotUniform()
    if weight_init == 'norm':
        weight_init_fn = las.init.Normal(0.1)
    if weight_init == 'uniform':
        weight_init_fn = las.init.Uniform()
    if weight_init == 'ortho':
        weight_init_fn = las.init.Orthogonal()

    train_subject_ids = read_data_split_file(config.get('training', 'train_subjects_file'))
    val_subject_ids = read_data_split_file(config.get('training', 'val_subjects_file'))
    test_subject_ids = read_data_split_file(config.get('training', 'test_subjects_file'))

    data_matrix = data['dataMatrix'].astype('float32')
    targets_vec = data['targetsVec'].reshape((-1,))
    subjects_vec = data['subjectsVec'].reshape((-1,))
    vidlen_vec = data['videoLengthVec'].reshape((-1,))

    if reorderdata:
        data_matrix = reorder_data(data_matrix, (imagesize[0], imagesize[1]))

    train_X, train_y, train_vidlens, train_subjects, \
    val_X, val_y, val_vidlens, val_subjects, \
    test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec,
                                                                 train_subject_ids, val_subject_ids, test_subject_ids)
    if matlab_target_offset:
        train_y -= 1
        val_y -= 1
        test_y -= 1

    if meanremove:
        train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens)
        val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens)
        test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens)

    if diffimage:
        train_X = compute_diff_images(train_X, train_vidlens)
        val_X = compute_diff_images(val_X, val_vidlens)
        test_X = compute_diff_images(test_X, test_vidlens)

    if samplewisenormalize:
        train_X = normalize_input(train_X)
        val_X = normalize_input(val_X)
        test_X = normalize_input(test_X)

    if featurewisenormalize:
        train_X, mean, std = featurewise_normalize_sequence(train_X)
        val_X = (val_X - mean) / std
        test_X = (test_X - mean) / std

    ae1 = load_decoder(stream1, stream1_shape, stream1_nonlinearities)

    # IMPT: the encoder was trained with fortan ordered images, so to visualize
    # convert all the images to C order using reshape_images_order()
    # output = dbn.predict(test_X)
    # test_X = reshape_images_order(test_X, (26, 44))
    # output = reshape_images_order(output, (26, 44))
    # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44))

    window = T.iscalar('theta')
    inputs1 = T.tensor3('inputs1', dtype='float32')
    mask = T.matrix('mask', dtype='uint8')
    targets = T.imatrix('targets')

    print('constructing end to end model...')
    network = deltanet_majority_vote.create_model(ae1, (None, None, stream1_dim), inputs1,
                                                  (None, None), mask,
                                                  lstm_size, window, output_classes,
                                                  weight_init_fn, use_peepholes)

    print_network(network)
    print('compiling model...')
    predictions = las.layers.get_output(network, deterministic=False)
    all_params = las.layers.get_all_params(network, trainable=True)
    cost = temporal_softmax_loss(predictions, targets, mask)
    default_learning_rate = theano.shared(las.utils.floatX(learning_rate), 'default_lr')
    lr_config = {
        'fc1': theano.shared(las.utils.floatX(0.001)),
        'fc2': theano.shared(las.utils.floatX(0.001)),
        'fc3': theano.shared(las.utils.floatX(0.001))
    }
    lr_map = custom.updates.generate_lr_map(all_params, lr_config, default_learning_rate)
    # updates = adam(cost, all_params, default_learning_rate)
    updates = custom.updates.adam_vlr(cost, all_params, lr_map)

    train = theano.function(
        [inputs1, targets, mask, window],
        cost, updates=updates, allow_input_downcast=True)
    compute_train_cost = theano.function([inputs1, targets, mask, window],
                                         cost, allow_input_downcast=True)

    test_predictions = las.layers.get_output(network, deterministic=True)
    test_cost = temporal_softmax_loss(test_predictions, targets, mask)
    compute_test_cost = theano.function(
        [inputs1, targets, mask, window], test_cost, allow_input_downcast=True)

    val_fn = theano.function([inputs1, mask, window], test_predictions, allow_input_downcast=True)

    # We'll train the network with 10 epochs of 30 minibatches each
    print('begin training...')
    cost_train = []
    cost_val = []
    class_rate = []
    STRIP_SIZE = 3
    val_window = circular_list(validation_window)
    train_strip = np.zeros((STRIP_SIZE,))
    best_val = float('inf')
    best_cr = 0.0

    datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize)

    val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens))
    test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens))

    # We'll use this "validation set" to periodically check progress
    X_val, y_val, mask_val, idxs_val = next(val_datagen)

    # we use the test set to check final classification rate
    X_test, y_test, mask_test, idxs_test = next(test_datagen)

    # reshape the targets for validation
    y_val_evaluate = y_val
    y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1)

    for epoch in range(num_epoch):
        time_start = time.time()
        for i in range(epochsize):
            X, y, m, batch_idxs = next(datagen)
            # repeat targets based on max sequence len
            y = y.reshape((-1, 1))
            y = y.repeat(m.shape[-1], axis=-1)
            print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format(
                epoch + 1, i + 1, epochsize, len(X), learning_rate)
            print(print_str, end='')
            sys.stdout.flush()
            train(X, y, m, windowsize)
            print('\r', end='')
        cost = compute_train_cost(X, y, m, windowsize)
        val_cost = compute_test_cost(X_val, y_val, mask_val, windowsize)
        cost_train.append(cost)
        cost_val.append(val_cost)
        train_strip[epoch % STRIP_SIZE] = cost
        val_window.push(val_cost)

        gl = 100 * (cost_val[-1] / np.min(cost_val) - 1)
        pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1)
        pq = gl / pk

        cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, windowsize, val_fn)
        class_rate.append(cr)

        if val_cost < best_val:
            best_val = val_cost
            best_cr = cr
            test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, windowsize, val_fn)
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start))
            best_params = las.layers.get_all_param_values(network)
        else:
            print("Epoch {} train cost = {}, val cost = {}, "
                  "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)"
                  .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start))

        if epoch >= validation_window and early_stop2(val_window, best_val, validation_window):
            break

        # Show that learning rates are changed by exploding learning rates for encoder layers
        # The training loss should increase dramatically and learning should diverge
        if epoch + 1 == 4:
            print('explode fc1,fc2,fc3 learning rates to 100.0')
            lr_config['fc1'].set_value(100.0)
            lr_config['fc2'].set_value(100.0)
            lr_config['fc3'].set_value(100.0)

    print('Final Model')
    print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr))

    # plot confusion matrix
    table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe')
    print('confusion matrix: ')
    print(table_str)

    if 'save_plot' in options:
        prefix = options['save_plot']
        plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix))
        with open('{}.confmat.txt'.format(prefix), mode='a') as f:
            f.write(table_str)
            f.write('\n\n')

    if 'write_results' in options:
        print('writing results to {}'.format(options['write_results']))
        results_file = options['write_results']
        with open(results_file, mode='a') as f:
            f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))

    if 'save_best' in options:
        print('saving best model...')
        las.layers.set_all_param_values(network, best_params)
        save_model_params(network, options['save_best'])
        print('best model saved to {}'.format(options['save_best']))