Beispiel #1
0
                    help='dataset name')
args = parser.parse_args()

if args.dataset == 'ljspeech':
    from datasets.lj_speech import vocab, LJSpeech as SpeechDataset
elif args.dataset == 'emovdb':
    from datasets.emovdb import vocab, Emovdb as SpeechDataset
else:
    from datasets.mb_speech import vocab, MBSpeech as SpeechDataset

use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

train_data_loader = Text2MelDataLoader(text2mel_dataset=SpeechDataset(
    ['texts', 'mels', 'mel_gates']),
                                       batch_size=64,
                                       mode='train')
valid_data_loader = Text2MelDataLoader(text2mel_dataset=SpeechDataset(
    ['texts', 'mels', 'mel_gates']),
                                       batch_size=64,
                                       mode='valid')

text2mel = Text2Mel(vocab).cuda()

start_timestamp = int(time.time() * 1000)
start_epoch = 0
global_step = 0

logger = Logger(args.dataset, 'text2mel')
                    required=True,
                    choices=['ljspeech', 'mbspeech'],
                    help='dataset name')
args = parser.parse_args()

if args.dataset == 'ljspeech':
    from datasets.lj_speech import LJSpeech as SpeechDataset
else:
    from datasets.mb_speech import MBSpeech as SpeechDataset

use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

train_data_loader = SSRNDataLoader(ssrn_dataset=SpeechDataset(['mags',
                                                               'mels']),
                                   batch_size=24,
                                   mode='train')
valid_data_loader = SSRNDataLoader(ssrn_dataset=SpeechDataset(['mags',
                                                               'mels']),
                                   batch_size=24,
                                   mode='valid')

ssrn = SSRN().cuda()

optimizer = torch.optim.Adam(ssrn.parameters(), lr=hp.ssrn_lr)

start_timestamp = int(time.time() * 1000)
start_epoch = 0
global_step = 0
Beispiel #3
0
elif args.dataset == 'mbspeech':
    from datasets.mb_speech import vocab, MBSpeech as SpeechDataset
elif args.dataset == 'swara':
    from datasets.swara import vocab, SWARA as SpeechDataset
elif args.dataset == 'swara_test':
    from datasets.swara_test import vocab, SWARA as SpeechDataset
else:
    print('No such dataset')
    sys.exit(1)

use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

train_data_loader = SSRNDataLoader(ssrn_dataset=SpeechDataset(['mags', 'mels']), batch_size=24, mode='train')
valid_data_loader = SSRNDataLoader(ssrn_dataset=SpeechDataset(['mags', 'mels']), batch_size=24, mode='valid')

ssrn = SSRN().cuda()

optimizer = torch.optim.Adam(ssrn.parameters(), lr=hp.ssrn_lr)

start_timestamp = int(time.time() * 1000)
start_epoch = 0
global_step = 0

logger = Logger(args.dataset, 'ssrn')

# load the last checkpoint if exists
last_checkpoint_file_name = get_last_checkpoint_file_name(logger.logdir)
if last_checkpoint_file_name: