Ejemplo n.º 1
0
def main():
    records_path = './data/'
    video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord'
    video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord'
    labels_train_record = records_path + 'characters_train_success.tfrecord'
    labels_test_record = records_path + 'characters_test_success.tfrecord'

    iterations = (
        (100, 20),  # clean
    )

    learning_rates = (
        (0.001, 0.0001),  # clean
    )

    logfile = 'lrs2_vid2chars'

    run_experiment(
        video_train_record=video_train_record,
        video_test_record=video_test_record,
        labels_train_record=labels_train_record,
        labels_test_record=labels_test_record,
        iterations=iterations,
        learning_rates=learning_rates,
        architecture='unimodal',
        logfile=logfile,
        video_processing='resnet_cnn',
        input_modality='video',
        regress_aus=True,
    )
Ejemplo n.º 2
0
def main():

    records_path = './data/'
    video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord'
    video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord'
    labels_train_record = records_path + 'characters_train_success.tfrecord'
    labels_test_record = records_path + 'characters_test_success.tfrecord'

    audio_train_records = (
        records_path + 'logmel_train_success_clean.tfrecord',
        records_path + 'logmel_train_success_cafe_10db.tfrecord',
        records_path + 'logmel_train_success_cafe_0db.tfrecord',
        records_path + 'logmel_train_success_cafe_-5db.tfrecord'
    )

    audio_test_records = (
       records_path + 'logmel_test_success_clean.tfrecord',
       records_path + 'logmel_test_success_cafe_10db.tfrecord',
       records_path + 'logmel_test_success_cafe_0db.tfrecord',
       records_path + 'logmel_test_success_cafe_-5db.tfrecord'
    )

    iterations = (
        (100, 20),  # clean
        (100, 20),  # 10db
        (100, 20),  # 0db
        (100, 20)     # -5db
    )

    learning_rates = (
        (0.001, 0.0001),  # clean
        (0.001, 0.0001),  # 10db
        (0.001, 0.0001),  # 0db
        (0.001, 0.0001)       # -5db
    )

    logfile = 'lrs2_avalign'

    run_experiment(
        video_train_record=video_train_record,
        video_test_record=video_test_record,
        labels_train_record=labels_train_record,
        labels_test_record=labels_test_record,
        audio_train_records=audio_train_records,
        audio_test_records=audio_test_records,
        iterations=iterations,
        learning_rates=learning_rates,
        architecture='av_align',
        regress_aus=True,
        audio_processing='features',
        video_processing='resnet_cnn',
        logfile=logfile,
    )
Ejemplo n.º 3
0
def main():
    video_train_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/rgb36lips_train_sd_aus.tfrecord'
    video_test_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/rgb36lips_test_sd_aus.tfrecord'
    labels_train_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/characters_train_sd.tfrecord'
    labels_test_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/characters_test_sd.tfrecord'

    audio_train_records = (
        '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_clean.tfrecord',
        '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_10db.tfrecord',
        '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_0db.tfrecord',
        '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_-5db.tfrecord'
    )

    audio_test_records = (
       '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_clean.tfrecord',
       '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_10db.tfrecord',
       '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_0db.tfrecord',
       '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_-5db.tfrecord'
    )

    iterations = (
        (200, 20),  # clean
        (100, 20),  # 10db
        (100, 20),  # 0db
        (0, 40)     # -5db
    )

    learning_rates = (
        (0.001, 0.0001),  # clean
        (0.001, 0.0001),  # 10db
        (0.001, 0.0001),  # 0db
        (0, 0.0001)       # -5db
    )

    logfile = 'tcd_audio_sd'

    run_experiment(
        video_train_record=video_train_record,
        video_test_record=video_test_record,
        labels_train_record=labels_train_record,
        labels_test_record=labels_test_record,
        audio_train_records=audio_train_records,
        audio_test_records=audio_test_records,
        iterations=iterations,
        learning_rates=learning_rates,
        architecture='unimodal',
        logfile=logfile,
    )
Ejemplo n.º 4
0
def main():
    records_path = './data/'

    labels_train_record = records_path + 'characters_train_success.tfrecord'
    labels_test_record = records_path + 'characters_test_success.tfrecord'

    audio_train_records = (
        records_path + 'logmel_train_success_clean.tfrecord',
        records_path + 'logmel_train_success_cafe_10db.tfrecord',
        records_path + 'logmel_train_success_cafe_0db.tfrecord',
        records_path + 'logmel_train_success_cafe_-5db.tfrecord')

    audio_test_records = (records_path + 'logmel_test_success_clean.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_10db.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_0db.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_-5db.tfrecord')

    iterations = (
        (100, 20),  # clean
        (100, 20),  # 10db
        (100, 20),  # 0db
        (100, 20)  # -5db
    )

    learning_rates = (
        (0.001, 0.0001),  # clean
        (0.001, 0.0001),  # 10db
        (0.001, 0.0001),  # 0db
        (0.001, 0.0001)  # -5db
    )

    logfile = 'lrs2_audio'

    run_experiment(
        labels_train_record=labels_train_record,
        labels_test_record=labels_test_record,
        audio_train_records=audio_train_records,
        audio_test_records=audio_test_records,
        iterations=iterations,
        learning_rates=learning_rates,
        architecture='unimodal',
        logfile=logfile,
        audio_processing='features',
    )
Ejemplo n.º 5
0
def main(argv):

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id
    FLAGS.architecture = 'av_transformer'
    records_path = './data/'

    video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord'
    video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord'
    labels_train_record = records_path + 'characters_train_success.tfrecord'
    labels_test_record = records_path + 'characters_test_success.tfrecord'

    audio_train_records = (
        records_path + 'logmel_train_success_clean.tfrecord',
        records_path + 'logmel_train_success_cafe_10db.tfrecord',
        records_path + 'logmel_train_success_cafe_0db.tfrecord',
        records_path + 'logmel_train_success_cafe_-5db.tfrecord')
    audio_test_records = (records_path + 'logmel_test_success_clean.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_10db.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_0db.tfrecord',
                          records_path +
                          'logmel_test_success_cafe_-5db.tfrecord')

    iterations = ((100, 20, 'clean'), (100, 20, '10db'), (100, 20, '0db'),
                  (100, 20, '-5db'))

    learning_rates = (
        (0.001, 0.0001),  # clean
        (0.001, 0.0001),  # 10db
        (0.001, 0.0001),  # 0db
        (0.001, 0.0001)  # -5db
    )

    run_experiment(
        video_train_record=video_train_record,
        video_test_record=video_test_record,
        labels_train_record=labels_train_record,
        labels_test_record=labels_test_record,
        audio_train_records=audio_train_records,
        audio_test_records=audio_test_records,
        iterations=iterations,
        learning_rates=learning_rates,
    )
Ejemplo n.º 6
0
def main(config, mode='train'):
    dataset_name = config['dataset']
    tfrecords_path = 'N:/datasets/' + dataset_name + '/tfrecords/'  #N:

    video_train_record = tfrecords_path + 'rgb36lips_train.tfrecord'
    video_trainTest_record = tfrecords_path + 'rgb36lips_trainTest.tfrecord'
    video_test_record = tfrecords_path + 'rgb36lips_test.tfrecord'
    labels_train_record = tfrecords_path + 'characters_train.tfrecord'
    labels_trainTest_record = tfrecords_path + 'characters_trainTest.tfrecord'
    labels_test_record = tfrecords_path + 'characters_test.tfrecord'
    unit_list_file = 'F:/Documents/datasets/' + dataset_name + '/misc/character_list'  #F:/Documents

    audio_train_records = (
        tfrecords_path + 'logmel_train_' + config['snr'] + '.tfrecord',
        #tfrecords_path +'logmel_train_cafe_10db.tfrecord',
        #tfrecords_path +'logmel_train_cafe_0db.tfrecord',
        #tfrecords_path +'logmel_train_cafe_-5db.tfrecord'
    )

    audio_trainTest_records = (
        tfrecords_path + 'logmel_trainTest_' + config['snr'] + '.tfrecord',
        #tfrecords_path +'logmel_trainTest_cafe_10db.tfrecord',
        #tfrecords_path +'logmel_trainTest_cafe_0db.tfrecord',
        #tfrecords_path +'logmel_trainTest_cafe_-5db.tfrecord'
    )

    audio_test_records = (
        tfrecords_path + 'logmel_test_' + config['snr'] + '.tfrecord',
        #tfrecords_path +'logmel_test_cafe_10db.tfrecord',
        #tfrecords_path +'logmel_test_cafe_0db.tfrecord',
        #tfrecords_path +'logmel_test_cafe_-5db.tfrecord'
    )

    iterations = (
        config['iterations'],  # clean
        #(250, 20),  # 10db
        #(250, 20),  # 0db
        #(250, 20)     # -5db
    )

    learning_rates = (
        config['learning_rate'],  # clean  (0.001, 0.0001)
        #(0.0005, 0.0001),  # 10db   (0.001, 0.0001)
        #(0.0005, 0.0001),  # 0db    (0.001, 0.0001)
        #(0.0005, 0.0001)   # -5db   (0.001, 0.0001)
    )

    run_experiment(
        video_train_record=video_train_record,
        video_trainTest_record=video_trainTest_record,
        video_test_record=video_test_record,
        labels_train_record=labels_train_record,
        labels_trainTest_record=labels_trainTest_record,
        labels_test_record=labels_test_record,
        audio_train_records=audio_train_records,
        audio_trainTest_records=audio_trainTest_records,
        audio_test_records=audio_test_records,
        iterations=iterations,
        learning_rates=learning_rates,
        architecture=config['architecture'],
        logfile=config['experiment_path'] + config['experiment_name'],
        unit_list_file=unit_list_file,
        cell_type=config['cell_type'],
        encoder_units_per_layer=config['encoder_units_per_layer'],
        cost_per_sample=config['cost_per_sample'],
        experiment_name=config['experiment_name'],
        experiment_path=config['experiment_path'],
        dataset_name=dataset_name,
        batch_size=config['batch_size'],
        write_attention_alignment=True,
        max_label_length=config['max_label_length'],
        decoder_units_per_layer=config['decoder_units_per_layer'],
        write_summary=config['write_summary'],
        write_eval_data=False if mode == 'train' else True,
        set_data_null=config['set_data_null'],
        snr=config['snr'],
        mode=mode,
    )