opt_kwargs = {'lr': args.lr, 'lr_decay': args.lr_decay, 'weight_decay': args.weight_decay, 'dampening': args.dampening,
              'momentum': args.momentum}

l2_dist = nn.CosineSimilarity(dim=1, eps=1e-12) if args.cos_sim else nn.PairwiseDistance(p=2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([
        totensor()
    ])
    transform_T = transforms.Compose([
        concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file,
                            remove_vad=args.remove_vad),
    ])
    transform_V = transforms.Compose([
        ConcateVarInput(remove_vad=args.remove_vad),
        # varLengthFeat(remove_vad=args.remove_vad),
        # concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, input_per_file=args.test_input_per_file,
        #                     remove_vad=args.remove_vad),
    ])

else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio

if args.log_scale:
        ConcateOrgInput(remove_vad=args.remove_vad),
    ])
    transform_T = transforms.Compose([
        ConcateOrgInput(remove_vad=args.remove_vad),
>>>>>>> Server/Server:TrainAndTest/test_egs.py
    ])

elif args.input_length == 'fix':
    transform = transforms.Compose([
<<<<<<< HEAD:TrainAndTest/test_vox1.py
        concateinputfromMFB(remove_vad=args.remove_vad),
    ])
    transform_T = transforms.Compose([
        concateinputfromMFB(input_per_file=args.test_input_per_file, remove_vad=args.remove_vad),
=======
        ConcateVarInput(frame_shift=args.frame_shift, remove_vad=args.remove_vad),
    ])
    transform_T = transforms.Compose([
        ConcateVarInput(frame_shift=args.frame_shift, remove_vad=args.remove_vad),
>>>>>>> Server/Server:TrainAndTest/test_egs.py
    ])
else:
    raise ValueError('input length must be var or fix.')

if args.mvnorm:
    transform.transforms.append(mvnormal())
    transform_T.transforms.append(mvnormal())

# pdb.set_trace()
if args.feat_format == 'kaldi':
    file_loader = read_mat
opt_kwargs = {'lr': args.lr,
              'lr_decay': args.lr_decay,
              'weight_decay': args.weight_decay,
              'dampening': args.dampening,
              'momentum': args.momentum}

l2_dist = nn.CosineSimilarity(dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([
        concateinputfromMFB(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad),
        # varLengthFeat(),
        to2tensor()
    ])
    transform_T = transforms.Compose([
        ConcateVarInput(num_frames=c.NUM_FRAMES_SPECT, remove_vad=args.remove_vad),
        # to2tensor()
    ])
    transform_V = transforms.Compose([
        varLengthFeat(remove_vad=args.remove_vad),
        to2tensor()
    ])

else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio
Ejemplo n.º 4
0
np.random.seed(args.seed)
torch.manual_seed(args.seed)

if args.cuda:
    cudnn.benchmark = True

# create logger
# Define visulaize SummaryWriter instance

kwargs = {'num_workers': 12, 'pin_memory': True} if args.cuda else {}
l2_dist = nn.CosineSimilarity(
    dim=1, eps=1e-6) if args.cos_sim else PairwiseDistance(2)

if args.acoustic_feature == 'fbank':
    transform = transforms.Compose([
        ConcateVarInput(),
    ])
    transform_T = transforms.Compose([
        ConcateVarInput(),
    ])
    file_loader = read_mat
else:
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])
    file_loader = read_audio

# pdb.set_trace()
    transform = transforms.Compose([
        truncatedinput(),
        toMFB(),
        totensor(),
        # tonormal()
    ])

if args.test_input == 'var':
    transform_V = transforms.Compose([
        ConcateOrgInput(remove_vad=args.remove_vad,
                        feat_type=args.feat_format),
    ])
elif args.test_input == 'fix':
    transform_V = transforms.Compose([
        ConcateVarInput(remove_vad=args.remove_vad,
                        num_frames=args.chunk_size,
                        frame_shift=args.chunk_size,
                        feat_type=args.feat_format),
    ])

if args.log_scale:
    transform.transforms.append(tolog())
    transform_V.transforms.append(tolog())

# pdb.set_trace()
if args.feat_format == 'kaldi':
    file_loader = read_mat
elif args.feat_format == 'npy':
    file_loader = np.load
elif args.feat_format == 'wav':
    file_loader = load_mat