def main(): records_path = './data/' video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord' video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord' labels_train_record = records_path + 'characters_train_success.tfrecord' labels_test_record = records_path + 'characters_test_success.tfrecord' iterations = ( (100, 20), # clean ) learning_rates = ( (0.001, 0.0001), # clean ) logfile = 'lrs2_vid2chars' run_experiment( video_train_record=video_train_record, video_test_record=video_test_record, labels_train_record=labels_train_record, labels_test_record=labels_test_record, iterations=iterations, learning_rates=learning_rates, architecture='unimodal', logfile=logfile, video_processing='resnet_cnn', input_modality='video', regress_aus=True, )
def main(): records_path = './data/' video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord' video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord' labels_train_record = records_path + 'characters_train_success.tfrecord' labels_test_record = records_path + 'characters_test_success.tfrecord' audio_train_records = ( records_path + 'logmel_train_success_clean.tfrecord', records_path + 'logmel_train_success_cafe_10db.tfrecord', records_path + 'logmel_train_success_cafe_0db.tfrecord', records_path + 'logmel_train_success_cafe_-5db.tfrecord' ) audio_test_records = ( records_path + 'logmel_test_success_clean.tfrecord', records_path + 'logmel_test_success_cafe_10db.tfrecord', records_path + 'logmel_test_success_cafe_0db.tfrecord', records_path + 'logmel_test_success_cafe_-5db.tfrecord' ) iterations = ( (100, 20), # clean (100, 20), # 10db (100, 20), # 0db (100, 20) # -5db ) learning_rates = ( (0.001, 0.0001), # clean (0.001, 0.0001), # 10db (0.001, 0.0001), # 0db (0.001, 0.0001) # -5db ) logfile = 'lrs2_avalign' run_experiment( video_train_record=video_train_record, video_test_record=video_test_record, labels_train_record=labels_train_record, labels_test_record=labels_test_record, audio_train_records=audio_train_records, audio_test_records=audio_test_records, iterations=iterations, learning_rates=learning_rates, architecture='av_align', regress_aus=True, audio_processing='features', video_processing='resnet_cnn', logfile=logfile, )
def main(): video_train_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/rgb36lips_train_sd_aus.tfrecord' video_test_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/rgb36lips_test_sd_aus.tfrecord' labels_train_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/characters_train_sd.tfrecord' labels_test_record = '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/characters_test_sd.tfrecord' audio_train_records = ( '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_clean.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_10db.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_0db.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_train_sd_cafe_-5db.tfrecord' ) audio_test_records = ( '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_clean.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_10db.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_0db.tfrecord', '/run/media/john_tukey/download/datasets/tcdtimit/tfrecords/logmel_test_sd_cafe_-5db.tfrecord' ) iterations = ( (200, 20), # clean (100, 20), # 10db (100, 20), # 0db (0, 40) # -5db ) learning_rates = ( (0.001, 0.0001), # clean (0.001, 0.0001), # 10db (0.001, 0.0001), # 0db (0, 0.0001) # -5db ) logfile = 'tcd_audio_sd' run_experiment( video_train_record=video_train_record, video_test_record=video_test_record, labels_train_record=labels_train_record, labels_test_record=labels_test_record, audio_train_records=audio_train_records, audio_test_records=audio_test_records, iterations=iterations, learning_rates=learning_rates, architecture='unimodal', logfile=logfile, )
def main(): records_path = './data/' labels_train_record = records_path + 'characters_train_success.tfrecord' labels_test_record = records_path + 'characters_test_success.tfrecord' audio_train_records = ( records_path + 'logmel_train_success_clean.tfrecord', records_path + 'logmel_train_success_cafe_10db.tfrecord', records_path + 'logmel_train_success_cafe_0db.tfrecord', records_path + 'logmel_train_success_cafe_-5db.tfrecord') audio_test_records = (records_path + 'logmel_test_success_clean.tfrecord', records_path + 'logmel_test_success_cafe_10db.tfrecord', records_path + 'logmel_test_success_cafe_0db.tfrecord', records_path + 'logmel_test_success_cafe_-5db.tfrecord') iterations = ( (100, 20), # clean (100, 20), # 10db (100, 20), # 0db (100, 20) # -5db ) learning_rates = ( (0.001, 0.0001), # clean (0.001, 0.0001), # 10db (0.001, 0.0001), # 0db (0.001, 0.0001) # -5db ) logfile = 'lrs2_audio' run_experiment( labels_train_record=labels_train_record, labels_test_record=labels_test_record, audio_train_records=audio_train_records, audio_test_records=audio_test_records, iterations=iterations, learning_rates=learning_rates, architecture='unimodal', logfile=logfile, audio_processing='features', )
def main(argv): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id FLAGS.architecture = 'av_transformer' records_path = './data/' video_train_record = records_path + 'rgb36lips_train_success_aus.tfrecord' video_test_record = records_path + 'rgb36lips_test_success_aus.tfrecord' labels_train_record = records_path + 'characters_train_success.tfrecord' labels_test_record = records_path + 'characters_test_success.tfrecord' audio_train_records = ( records_path + 'logmel_train_success_clean.tfrecord', records_path + 'logmel_train_success_cafe_10db.tfrecord', records_path + 'logmel_train_success_cafe_0db.tfrecord', records_path + 'logmel_train_success_cafe_-5db.tfrecord') audio_test_records = (records_path + 'logmel_test_success_clean.tfrecord', records_path + 'logmel_test_success_cafe_10db.tfrecord', records_path + 'logmel_test_success_cafe_0db.tfrecord', records_path + 'logmel_test_success_cafe_-5db.tfrecord') iterations = ((100, 20, 'clean'), (100, 20, '10db'), (100, 20, '0db'), (100, 20, '-5db')) learning_rates = ( (0.001, 0.0001), # clean (0.001, 0.0001), # 10db (0.001, 0.0001), # 0db (0.001, 0.0001) # -5db ) run_experiment( video_train_record=video_train_record, video_test_record=video_test_record, labels_train_record=labels_train_record, labels_test_record=labels_test_record, audio_train_records=audio_train_records, audio_test_records=audio_test_records, iterations=iterations, learning_rates=learning_rates, )
def main(config, mode='train'): dataset_name = config['dataset'] tfrecords_path = 'N:/datasets/' + dataset_name + '/tfrecords/' #N: video_train_record = tfrecords_path + 'rgb36lips_train.tfrecord' video_trainTest_record = tfrecords_path + 'rgb36lips_trainTest.tfrecord' video_test_record = tfrecords_path + 'rgb36lips_test.tfrecord' labels_train_record = tfrecords_path + 'characters_train.tfrecord' labels_trainTest_record = tfrecords_path + 'characters_trainTest.tfrecord' labels_test_record = tfrecords_path + 'characters_test.tfrecord' unit_list_file = 'F:/Documents/datasets/' + dataset_name + '/misc/character_list' #F:/Documents audio_train_records = ( tfrecords_path + 'logmel_train_' + config['snr'] + '.tfrecord', #tfrecords_path +'logmel_train_cafe_10db.tfrecord', #tfrecords_path +'logmel_train_cafe_0db.tfrecord', #tfrecords_path +'logmel_train_cafe_-5db.tfrecord' ) audio_trainTest_records = ( tfrecords_path + 'logmel_trainTest_' + config['snr'] + '.tfrecord', #tfrecords_path +'logmel_trainTest_cafe_10db.tfrecord', #tfrecords_path +'logmel_trainTest_cafe_0db.tfrecord', #tfrecords_path +'logmel_trainTest_cafe_-5db.tfrecord' ) audio_test_records = ( tfrecords_path + 'logmel_test_' + config['snr'] + '.tfrecord', #tfrecords_path +'logmel_test_cafe_10db.tfrecord', #tfrecords_path +'logmel_test_cafe_0db.tfrecord', #tfrecords_path +'logmel_test_cafe_-5db.tfrecord' ) iterations = ( config['iterations'], # clean #(250, 20), # 10db #(250, 20), # 0db #(250, 20) # -5db ) learning_rates = ( config['learning_rate'], # clean (0.001, 0.0001) #(0.0005, 0.0001), # 10db (0.001, 0.0001) #(0.0005, 0.0001), # 0db (0.001, 0.0001) #(0.0005, 0.0001) # -5db (0.001, 0.0001) ) run_experiment( video_train_record=video_train_record, video_trainTest_record=video_trainTest_record, video_test_record=video_test_record, labels_train_record=labels_train_record, labels_trainTest_record=labels_trainTest_record, labels_test_record=labels_test_record, audio_train_records=audio_train_records, audio_trainTest_records=audio_trainTest_records, audio_test_records=audio_test_records, iterations=iterations, learning_rates=learning_rates, architecture=config['architecture'], logfile=config['experiment_path'] + config['experiment_name'], unit_list_file=unit_list_file, cell_type=config['cell_type'], encoder_units_per_layer=config['encoder_units_per_layer'], cost_per_sample=config['cost_per_sample'], experiment_name=config['experiment_name'], experiment_path=config['experiment_path'], dataset_name=dataset_name, batch_size=config['batch_size'], write_attention_alignment=True, max_label_length=config['max_label_length'], decoder_units_per_layer=config['decoder_units_per_layer'], write_summary=config['write_summary'], write_eval_data=False if mode == 'train' else True, set_data_null=config['set_data_null'], snr=config['snr'], mode=mode, )