if not os.path.exists(args.check_path):
    os.makedirs(args.check_path)

opt_kwargs = {
    'lr': args.lr,
    'lr_decay': args.lr_decay,
    'weight_decay': args.weight_decay,
    'dampening': args.dampening,
    'momentum': args.momentum
}

l2_dist = nn.CosineSimilarity(
    dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([totensor()])
    transform_T = transforms.Compose([
        concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT,
                            input_per_file=args.test_input_per_file,
                            remove_vad=args.remove_vad),
    ])
    transform_V = transforms.Compose([
        varLengthFeat(remove_vad=args.remove_vad),
    ])

else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
else:
    l2_dist = PairwiseDistance(2)

voxceleb, voxceleb_dev = wav_list_reader(args.dataroot)
if args.makemfb:
    # pbar = tqdm(voxceleb)
    for datum in voxceleb:
        mk_MFB(
            (args.dataroot + '/voxceleb1_wav/' + datum['filename'] + '.wav'))
    print("Complete convert")

if args.mfb:
    transform = transforms.Compose([
        concateinputfromMFB(),
        # truncatedinputfromMFB(),
        totensor()
    ])
    transform_T = transforms.Compose([
        concateinputfromMFB(input_per_file=args.test_input_per_file),
        # truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio
# create logger
logger = Logger(LOG_DIR)

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}
l2_dist = PairwiseDistance(2)

# voxceleb = read_my_voxceleb_structure(args.dataroot)
# if args.makemfb:
#pbar = tqdm(voxceleb)
# for datum in voxceleb:
#     # print(datum['filename'])
#     mk_MFB((args.dataroot +'/voxceleb1_wav/' + datum['filename']+'.wav'))
# print("Complete convert")

if args.mfb:
    transform = transforms.Compose([truncatedinputfromMFB(), totensor()])
    transform_T = transforms.Compose([
        truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        #tonormal()
    ])
    file_loader = read_audio

# voxceleb_dev = [datum for datum in voxceleb if datum['subset']=='dev']
Exemple #4
0
#     print("Complete convert")
#
# if args.makespec:
#     num_pro = 1.
#     for datum in voxceleb:
#         # Data/voxceleb1/
#         # /data/voxceleb/voxceleb1_wav/
#         GenerateSpect(wav_path='/data/voxceleb/voxceleb1_wav/' + datum['filename']+'.wav',
#                       write_path=args.dataroot +'/spectrogram/voxceleb1_wav/' + datum['filename']+'.npy')
#         print('\rprocessed {:2f}% {}/{}.'.format(num_pro/len(voxceleb), num_pro, len(voxceleb)), end='\r')
#         num_pro += 1
#     print('\nComputing Spectrograms success!')
#     exit(1)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([concateinputfromMFB(), totensor()])
    transform_T = transforms.Compose([
        concateinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB

else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio
Exemple #5
0
        # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=True),
        varLengthFeat(remove_vad=args.remove_vad),
        to2tensor(),
        tonormal()
    ])
    transform_T = transforms.Compose([
        # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file, remove_vad=True),
        varLengthFeat(remove_vad=args.remove_vad),
        to2tensor(),
        tonormal()
    ])

else:
    transform = transforms.Compose(
        [truncatedinput(), toMFB(),
         totensor(), tonormal()])
    file_loader = read_audio

# pdb.set_trace()
file_loader = read_mat
train_dir = ScriptTrainDataset(dir=args.train_dir,
                               samples_per_speaker=args.input_per_spks,
                               loader=file_loader,
                               transform=transform,
                               num_valid=args.num_valid)
test_dir = ScriptTestDataset(dir=args.test_dir,
                             loader=file_loader,
                             transform=transform_T)

if len(test_dir) < args.veri_pairs:
    args.veri_pairs = len(test_dir)
Exemple #6
0
np.random.seed(args.seed)
torch.manual_seed(args.seed)

if args.cuda:
    cudnn.benchmark = True

# create logger
# Define visulaize SummaryWriter instance

kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {}

l2_dist = nn.CosineSimilarity(
    dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([varLengthFeat(), totensor()])
    transform_T = transforms.Compose([varLengthFeat(), totensor()])
    file_loader = read_mat
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio

# pdb.set_trace()
train_dir = KaldiExtractDataset(dir=args.train_dir,
                                loader=file_loader,
                                transform=transform)
Exemple #7
0
    'lr_decay': args.lr_decay,
    'weight_decay': args.weight_decay,
    'dampening': args.dampening,
    'momentum': args.momentum
}

l2_dist = nn.CosineSimilarity(
    dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([
        ConcateNumInput(input_per_file=args.input_per_spks,
                        num_frames=args.chunk_size,
                        feat_type=args.feat_format,
                        remove_vad=args.remove_vad),
        totensor()
    ])

if args.test_input == 'var':
    transform_V = transforms.Compose([
        ConcateOrgInput(remove_vad=args.remove_vad,
                        feat_type=args.feat_format),
    ])
elif args.test_input == 'fix':
    transform_V = transforms.Compose([
        ConcateVarInput(remove_vad=args.remove_vad,
                        num_frames=args.chunk_size,
                        frame_shift=args.chunk_size,
                        feat_type=args.feat_format),
    ])