voxceleb_list = "Data/voxceleb.npy"
voxceleb_dev_list = "Data/voxceleb_dev.npy"
voxceleb_dev_10k_list = "Data/voxceleb_dev_10k.npy"

if os.path.isfile(voxceleb_list):
    voxceleb = np.load(voxceleb_list, allow_pickle=True)
else:
    voxceleb = read_my_voxceleb_structure(args.dataroot)
    np.save(voxceleb_list, voxceleb)

# Make fbank feature if not yet.
if args.makemfb:
    #pbar = tqdm(voxceleb)
    for datum in voxceleb:
        # print(datum['filename'])
        mk_MFB((args.dataroot + '/' + datum['filename'] + '.wav'))
    print("Complete convert")

# Create file loader for dataset
if args.mfb:
    transform = transforms.Compose([truncatedinputfromMFB(), totensor()])
    transform_T = transforms.Compose([
        truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
# create logger
logger = Logger(LOG_DIR)
# Define visulaize SummaryWriter instance
writer = SummaryWriter('Log/amsoftmax_res10', comment='margin0.3')

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}
if args.cos_sim:
    l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6)
else:
    l2_dist = PairwiseDistance(2)

voxceleb, voxceleb_dev = wav_list_reader(args.dataroot)
if args.makemfb:
    # pbar = tqdm(voxceleb)
    for datum in voxceleb:
        mk_MFB(
            (args.dataroot + '/voxceleb1_wav/' + datum['filename'] + '.wav'))
    print("Complete convert")

if args.mfb:
    transform = transforms.Compose([
        concateinputfromMFB(),
        # truncatedinputfromMFB(),
        totensor()
    ])
    transform_T = transforms.Compose([
        concateinputfromMFB(input_per_file=args.test_input_per_file),
        # truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else:
Esempio n. 3
0
    os.makedirs(EXT_DIR)
# create logger
logger = Logger(LOG_DIR)

kwargs = {'num_workers': 0, 'pin_memory': True} if args.cuda else {}
l2_dist = PairwiseDistance(2)


audio_set = []
audio_set = if_load_npy(dataroot, data_set_list)

if args.makemfb:
    #pbar = tqdm(voxceleb)
    for datum in audio_set:
        # print(datum['filename'])
        mk_MFB((datum['filename']+'.wav'))
    print("Complete convert")

if args.mfb:
    transform = transforms.Compose([
        concateinputfromMFB(),
        to4tensor()
        # truncatedinputfromMFB(),
        # totensor()
    ])
    transform_T = transforms.Compose([
        truncatedinputfromMFB(input_per_file=args.test_input_per_file),
        totensor()
    ])
    file_loader = read_MFB
else: