dataset_list = sys.argv[5:] dataset_num = len(dataset_list) dis_phone_num = 4 # load data for dataset_indx in range(dataset_num): # intial the dist class for different training datasets train_dist_path = os.path.join(dataset_list[dataset_indx], "subivector_dist.ark") dist_trans = dataset.Input_transform(train_dist_path, test_dist_path, dis_phone_num, prob_trans_path) ds_path = os.path.join(dataset_list[dataset_indx], "subivector.scp") ds = dataset.SubivectorDataset(ds_path) utt_num = ds.__len__() print("The number of utters in %s is %d." % (ds_path, utt_num)) subivector_dim = np.size(ds.data_list[0], 1) phone_num = np.size(ds.data_list[0], 0) # read into training data loader ds_loader = DataLoader( ds, batch_size=batchSize, # batch training shuffle=False, num_workers=int(1)) # set hook and store output in csvector
exit() # (Hyper parameters) batchSize = 128 # batchsize的大小 niter = 500 # epoch的最大值 # load data train_path = sys.argv[ 1] #'/scratch/njzheng/myprogram/phone_vectors5/subivector.scp' sre_test_path = sys.argv[ 2] #'/scratch/njzheng/myprogram/sre_test/subivector_test.scp' sre_train_path = sys.argv[ 3] #'/scratch/njzheng/myprogram/sre_train/subivector_test.scp' sre_path = sys.argv[4] #'/scratch/njzheng/myprogram/sre/subivector_test.scp' train_dataset = dataset.SubivectorDataset(train_path) sre_test_dataset = dataset.SubivectorDataset(sre_test_path) sre_train_dataset = dataset.SubivectorDataset(sre_train_path) sre_dataset = dataset.SubivectorDataset(sre_path) utt_num = train_dataset.__len__() train_len = int(0.9 * utt_num) valid_len = utt_num - train_len train, valid = torch.utils.data.random_split(train_dataset, lengths=[train_len, valid_len]) subivector_dim = np.size(train_dataset.data_list[0], 0) phone_num = np.size(train_dataset.data_list[0], 1) # load spk2int list uttid2int_dic_path = sys.argv[
phone_num = 43 subnet_out_dim = 50 out_dim_sqrt = torch.tensor(subnet_out_dim).float().sqrt() subnet_list = [] # Need loop ============================================================== for phone_indx in range(34, phone_num + 1): dnn_name = sys.argv[3] + "." + str(phone_indx) # $sub_train_path/net.pkl # train_scp_path = os.path.join(train_path,"subivector.scp") train_scp_path = os.path.join(train_path, "phone_vector", "triplet_data." + str(phone_indx) + ".scp") print('scp file is: %s' % (train_scp_path)) train_dataset = dataset.SubivectorDataset(train_scp_path) subivector_dim = np.size(train_dataset.data_list[0], 1) in_dim_sqrt = torch.tensor(subivector_dim).float().sqrt() train_utt_num = train_dataset.__len__() train_len = int(0.9 * train_utt_num) valid_len = train_utt_num - train_len train, valid = torch.utils.data.random_split( train_dataset, lengths=[train_len, valid_len]) print('The number of training and valid utters are %d %d' % (train_len, valid_len)) print('The subivector_dim is %d, phone_indx is %d' % (subivector_dim, phone_indx))