Exemple #1
0
    def process_audio(self, wav_path, label_file, char_file, h5_file):
        #read map file
        self.char_map, self.int2phone = process_map_file(char_file)

        #read the label file
        label_dict = process_label_file(label_file, self.out_type,
                                        self.char_map)

        #extract spectrum
        spec_dict = dict()
        f = open(wav_path, 'r')
        for line in f.readlines():
            utt, path = line.strip().split()
            spect = parse_audio(path,
                                audio_conf,
                                windows,
                                normalize=self.normalize)
            spec_dict[utt] = spect.numpy()
        f.close()

        assert len(spec_dict) == len(label_dict)

        self.features_label = []
        #save the data as h5 file
        f = h5py.File(h5_file, 'w')
        f.create_dataset("phone_map_key", data=self.char_map.keys())
        f.create_dataset("phone_map_value", data=self.char_map.values())
        for utt in spec_dict:
            grp = f.create_group(utt)
            self.features_label.append(
                (torch.FloatTensor(spec_dict[utt]), label_dict[utt].tolist()))
            grp.create_dataset('data', data=spec_dict[utt])
            grp.create_dataset('label', data=label_dict[utt])
        print("Saved the %s data to h5py file" % self.data_set)
Exemple #2
0
    def process_txt(self, mfcc_file, label_file, char_file, h5_file):
        #read map file
        self.char_map, self.int2phone = process_map_file(char_file)

        #read the label file
        label_dict = process_label_file(label_file, self.out_type,
                                        self.char_map)

        #read the mfcc file
        mfcc_dict = process_kaldi_feat(mfcc_file, self.n_feats)

        if len(mfcc_dict) != len(label_dict):
            print("%s data: The num of wav and text are not the same!" %
                  self.data_set)
            sys.exit(1)

        self.features_label = []
        #save the data as h5 file
        f = h5py.File(h5_file, 'w')
        f.create_dataset("phone_map_key", data=self.char_map.keys())
        f.create_dataset("phone_map_value", data=self.char_map.values())
        for utt in mfcc_dict:
            grp = f.create_group(utt)
            self.features_label.append(
                (torch.FloatTensor(np.array(mfcc_dict[utt])),
                 label_dict[utt].tolist()))
            grp.create_dataset('data', data=np.array(mfcc_dict[utt]))
            grp.create_dataset('label', data=label_dict[utt])
        print("Saved the %s data to h5py file" % self.data_set)
    def __init__(self,
                 data_dir,
                 data_set='train',
                 feature_type='spectrum',
                 out_type='phone',
                 n_feats=39,
                 mel=True):
        self.data_set = data_set
        self.out_type = out_type
        self.feature_type = feature_type
        self.mel = mel

        scp_file = os.path.join(data_dir, data_set, feature_type + '.scp')
        label_file = os.path.join(data_dir, data_set,
                                  out_map[out_type] + '_text')
        class_file = os.path.join(data_dir, out_type + '_list.txt')

        self.class2int, self.int2class = process_map_file(class_file)

        if feature_type == "waveform":
            self.label_dict = process_label_file(label_file, self.out_type,
                                                 self.class2int)
            self.item = []
            with open(wav_path, 'r') as f:
                for line in f.readlines():
                    utt, path = line.strip().split('\t')
                    self.item.append((path, self.label_dict[utt]))
        else:
            self.process_scp_label(scp_file, label_file)
Exemple #4
0
    def process_audio(self, wav_path, label_file):
        # read the label file
        self.label = process_label_file(label_file, self.char2int)

        # read the path file
        self.path = []
        with open(wav_path, 'r') as f:
            for line in f.readlines():
                utt, path = line.strip().split()
                self.path.append(path)

        # ensure the same samples of input and label
        assert len(self.label) == len(self.path)
Exemple #5
0
 def process_audio(self, wav_path, label_file):
     #read the label file
     self.label = process_label_file(label_file, self.char2int)
     
     #read the path file
     self.path  = []
     with open(wav_path, 'r') as f:
         for line in f.readlines():
             utt, path = line.strip().split()
             self.path.append(path)
     
     #ensure the same samples of input and label
     assert len(self.label) == len(self.path)
Exemple #6
0
    def process_audio(self, wav_path, label_file, char_file, h5_file):
        #read map file
        self.char_map, self.int2phone = process_map_file(char_file)

        #read the label file
        label_dict = process_label_file(label_file, self.out_type,
                                        self.char_map)

        #extract spectrum
        spec_dict = dict()
        f = open(wav_path, 'r')
        for line in f.readlines():
            utt, path = line.strip().split()
            spect = self.parse_audio(path)
            #print(spect)
            spec_dict[utt] = spect.numpy()
        f.close()

        if self.normalize:
            i = 0
            for utt in spec_dict:
                if i == 0:
                    spec_all = torch.FloatTensor(spec_dict[utt])
                else:
                    spec_all = torch.cat(
                        (spec_all, torch.FloatTensor(spec_dict[utt])), 0)
                i += 1
            mean = torch.mean(spec_all, 0, True)
            std = torch.std(spec_all, 0, True)
            for utt in spec_dict:
                tmp = torch.add(torch.FloatTensor(spec_dict[utt]), -1, mean)
                spec_dict[utt] = torch.div(tmp, std).numpy()

        if len(spec_dict) != len(label_dict):
            print("%s data: The num of wav and text are not the same!" %
                  self.data_set)
            sys.exit(1)

        self.features_label = []
        #save the data as h5 file
        f = h5py.File(h5_file, 'w')
        f.create_dataset("phone_map_key", data=self.char_map.keys())
        f.create_dataset("phone_map_value", data=self.char_map.values())
        for utt in spec_dict:
            grp = f.create_group(utt)
            self.features_label.append(
                (torch.FloatTensor(spec_dict[utt]), label_dict[utt].tolist()))
            grp.create_dataset('data', data=spec_dict[utt])
            grp.create_dataset('label', data=label_dict[utt])
        print("Saved the %s data to h5py file" % self.data_set)
Exemple #7
0
    def process_scp_label(self, scp_file, label_file):
        #read the label file
        label_dict = process_label_file(label_file, self.out_type, self.class2int)
        
        path_dict = {}
        #read the scp file
        with open(scp_file, 'r') as rf:
            for lines in rf.readlines():
                utt, path = lines.strip().split()
                path_dict[utt] = path

        assert len(path_dict) == len(label_dict)

        self.item = []
        for utt in path_dict:
            self.item.append((path_dict[utt], label_dict[utt]))
Exemple #8
0
    def preprocessing(basedir, split_train=True, split_ratio=0.1):
        """
        Pre-processing raw data files.
        One should download the MNIST data beforehand using the bash scripts in `run_all.sh`
        If no data is found, prompt error info.
        
        Args:
            basedir (str): folder where the raw data files located 
            split_train: if True split training set into training set and validation set
            ratio (float): ratio of the validation set 
        """
        train_file_set = {
            'image': 'train-images-idx3-ubyte',
            'label': 'train-labels-idx1-ubyte'
        }
        test_file_set = {
            'image': 't10k-images-idx3-ubyte',
            'label': 't10k-labels-idx1-ubyte'
        }
        training_file = 'training.pt'
        test_file = 'testing.pt'
        # process and save as torch files
        print('Processing...')
        for val in train_file_set.values():
            if not os.path.exists(os.path.join(basedir, val)):
                print('%s does not exist. Check the dataset folder.' %
                      os.path.join(basedir, val))
                return

        for val in test_file_set.values():
            if not os.path.exists(os.path.join(basedir, val)):
                print('%s does not exist. Check the dataset folder.' %
                      os.path.join(basedir, val))
                return

        length, labels = process_label_file(
            os.path.join(basedir, train_file_set['label']))
        train_labels_pt = torch.from_numpy(labels).view(length).long()

        length, num_rows, num_cols, images = process_image_file(
            os.path.join(basedir, train_file_set['image']))
        train_image_pt = torch.from_numpy(images).view(length, num_rows,
                                                       num_cols)

        mean_train, std_train = np.mean(images) / 255.0, np.std(images) / 255.0
        length, labels = process_label_file(
            os.path.join(basedir, test_file_set['label']))
        test_labels_pt = torch.from_numpy(labels).view(length).long()

        length, num_rows, num_cols, images = process_image_file(
            os.path.join(basedir, test_file_set['image']))
        test_image_pt = torch.from_numpy(images).view(length, num_rows,
                                                      num_cols)

        np.save(os.path.join(basedir, 'stats'), [mean_train, std_train])

        with open(os.path.join(basedir, training_file), 'wb') as f:
            torch.save((train_image_pt, train_labels_pt), f)
        with open(os.path.join(basedir, test_file), 'wb') as f:
            torch.save((test_image_pt, test_labels_pt), f)

        if split_train:
            print('Spliting training set...')
            idx = np.random.permutation(len(train_labels_pt))
            val_len = int(len(train_labels_pt) * split_ratio)
            label_validation_split = train_labels_pt[idx[:val_len]]
            label_train_split = train_labels_pt[idx[val_len:]]
            image_validation_split = train_image_pt[idx[:val_len]]
            image_train_split = train_image_pt[idx[val_len:]]
            with open(os.path.join(basedir, 'validation_split.pt'), 'wb') as f:
                torch.save((image_validation_split, label_validation_split), f)
            with open(os.path.join(basedir, 'training_split.pt'), 'wb') as f:
                torch.save((image_train_split, label_train_split), f)

        print('Done!')