Example #1
0
    def __init__(self, params, datadict, is_eval=False):

        self.params = params
        self.datadict = datadict
        self.is_eval = is_eval
        self.max_length = params['max_length'] if 'max_length' in params and not is_eval else 1000

        self.apply_spec_augment = True if params['spec_augment'] and not self.is_eval else False

        if self.apply_spec_augment:
            logging.info('Apply SpecAugment!')
            self.spec_augment_config = params['spec_augment_config']
            logging.info('Config: %s' % ' '.join([key+':'+str(value) for key, value in self.spec_augment_config.items()]))

        self.unit2idx = load_vocab(params['vocab'])

        self.target_dict = {}
        for text_file in self.datadict['text']:
            with open(text_file, 'r', encoding='utf-8') as f:
                for line in f:
                    parts = line.strip().split()
                    utt_id = parts[0]
                    tokens = self.encode(parts[1:])
                    if len(tokens) > self.max_length: continue
                    self.target_dict[utt_id] = tokens

        self.feat_list = []
        for feat_file in self.datadict['feat']:
            with open(feat_file, 'r', encoding='utf-8') as f:
                for line in f:
                    utt_id, feat_path = line.strip().split()
                    if utt_id not in self.target_dict: continue
                    self.feat_list.append([utt_id, feat_path])

        if 'utt2spk' in self.datadict:
            self.apply_cmvn = True
            assert 'cmvn' in self.datadict
            self.utt2spk = {}
            for utt2spk in self.datadict['utt2spk']:
                with open(utt2spk, 'r') as f:
                    for line in f:
                        uttid, spkid = line.strip().split()
                        self.utt2spk[uttid] = spkid
            
            self.cmvn = {}
            for cmvn in self.datadict['cmvn']:
                with open(cmvn, 'r') as f:
                    for line in f:
                        spkid, path = line.strip().split()
                        self.cmvn[spkid] = path
            logging.info('Apply CMVN!')
        else:
            self.apply_cmvn = False
Example #2
0
def main(args):
    checkpoint = torch.load(args.load_model)
    if 'params' in checkpoint:
        params = checkpoint['params']
    else:
        assert os.path.isfile(args.config), 'please specify a configure file.'
        with open(args.config, 'r') as f:
            params = yaml.load(f)

    params['data']['shuffle'] = False
    params['data']['spec_argument'] = False
    params['data']['short_first'] = False
    params['data']['batch_size'] = args.batch_size

    model = Transformer(params['model'])

    model.load_state_dict(checkpoint['model'])
    print('Load pre-trained model from %s' % args.load_model)

    model.eval()
    if args.ngpu > 0:
        model.cuda()

    char2unit = load_vocab(params['data']['vocab'])
    unit2char = {i: c for c, i in char2unit.items()}

    recognizer = TransformerRecognizer(model,
                                       unit2char=unit2char,
                                       beam_width=args.beam_width,
                                       max_len=args.max_len,
                                       penalty=args.penalty,
                                       lamda=args.lamda,
                                       ngpu=args.ngpu)

    # inputs_length: [len]
    inputs, inputs_length = calc_fbank(args.file, params['data'])
    if args.ngpu > 0:
        inputs = inputs.cuda()
        inputs_length = inputs_length.cuda()

    preds = recognizer.recognize(inputs, inputs_length)
    print('preds: {}'.format(preds[0].replace(' ', '')))
Example #3
0
    def __init__(self, params, datadict, is_eval=False):

        self.params = params
        self.datadict = datadict
        self.is_eval = is_eval
        self.apply_spec_augment = True if params[
            'spec_augment'] and not self.is_eval else False

        if self.apply_spec_augment:
            logging.info('Apply SpecAugment!')
            self.spec_augment_config = params['spec_augment_config']
            logging.info('Config: %s' % ' '.join([
                key + ':' + str(value)
                for key, value in self.spec_augment_config.items()
            ]))

        self.unit2idx = load_vocab(params['vocab'])

        with open(self.datadict['json'], 'r') as f:
            self.utts = [(k, v) for k, v in json.load(f)['utts'].items()]
Example #4
0
def main(args):

    checkpoint = torch.load(args.load_model)
    if 'params' in checkpoint:
        params = checkpoint['params']
    else:
        assert os.path.isfile(args.config), 'please specify a configure file.'
        with open(args.config, 'r') as f:
            params = yaml.load(f)

    params['data']['shuffle'] = False
    params['data']['spec_argument'] = False
    params['data']['short_first'] = False
    params['data']['batch_size'] = args.batch_size

    expdir = os.path.join('egs', params['data']['name'], 'exp',
                          params['train']['save_name'])
    if args.suffix is None:
        decode_dir = os.path.join(expdir, 'decode_%s' % args.decode_set)
    else:
        decode_dir = os.path.join(
            expdir, 'decode_%s_%s' % (args.decode_set, args.suffix))

    if not os.path.exists(decode_dir):
        os.makedirs(decode_dir)

    model = Transformer(params['model'])

    model.load_state_dict(checkpoint['model'])
    print('Load pre-trained model from %s' % args.load_model)

    model.eval()
    if args.ngpu > 0:
        model.cuda()

    char2unit = load_vocab(params['data']['vocab'])
    unit2char = {i: c for c, i in char2unit.items()}

    dataset = AudioDataset(params['data'], args.decode_set)
    data_loader = FeatureLoader(dataset)

    recognizer = TransformerRecognizer(model,
                                       unit2char=unit2char,
                                       beam_width=args.beam_width,
                                       max_len=args.max_len,
                                       penalty=args.penalty,
                                       lamda=args.lamda,
                                       ngpu=args.ngpu)

    totals = len(dataset)
    batch_size = params['data']['batch_size']
    writer = open(os.path.join(decode_dir, 'predict.txt'), 'w')
    for step, (utt_id, batch) in enumerate(data_loader.loader):

        if args.ngpu > 0:
            inputs = batch['inputs'].cuda()
            inputs_length = batch['inputs_length'].cuda()

        preds = recognizer.recognize(inputs, inputs_length)

        targets = batch['targets']
        targets_length = batch['targets_length']

        for b in range(len(preds)):
            n = step * batch_size + b
            truth = ' '.join(
                [unit2char[i.item()] for i in targets[b][1:targets_length[b]]])
            print('[%d / %d ] %s - pred : %s' %
                  (n, totals, utt_id[b], preds[b]))
            print('[%d / %d ] %s - truth: %s' % (n, totals, utt_id[b], truth))
            writer.write(utt_id[b] + ' ' + preds[b] + '\n')

    writer.close()
Example #5
0
    def __init__(self, params, datadict, is_eval=False):

        self.params = params
        self.datadict= datadict
        self.is_eval = is_eval
        self.apply_spec_augment = params['spec_augment'] if not self.is_eval else False

        logger.info('[Online-Reader] Read the feature extracted online!')

        self.normalization = params['normalization']
        self.feature_extractor = params['feature_extractor'] if 'feature_extractor' in params else 'torchaudio'
        assert self.feature_extractor in ['torchaudio', 'python_speech_feature', 'ta', 'psf']
        logger.info('Utilize %s to extract feature from wav.' % self.feature_extractor)
        if self.normalization:
            logger.info('Apply Feature Normalization!')
            if 'global_cmvn' in params:
                self.global_mean = torch.from_numpy(np.load(params['global_cmvn'] + '.mean.npy'))
                self.global_std = torch.from_numpy(np.load(params['global_cmvn'] + '.std.npy'))
                logger.info('Load global mean and std vector from files')
                self.apply_global_cmvn = True
            else:
                self.apply_global_cmvn = False

        if self.apply_spec_augment and not self.is_eval:
            self.spec_augment_config = params['spec_augment_config']
            logger.info('Apply SpecAugment!')
            logger.info('Config: %s' % ' '.join([key+':'+str(value) for key, value in self.spec_augment_config.items()]))

        if 'gaussian_noise' in params and not self.is_eval:
            self.gaussian_noise = params['gaussian_noise']
            if self.gaussian_noise > 0.0:
                logger.info('Apply Gaussian Noise with std: %f.' % self.gaussian_noise)
        else:
            self.gaussian_noise = 0.0

        # if 'speed_perturb' in params and not self.is_eval:
        #     self.apply_speed_perturb = params['speed_perturb']
        #     if self.apply_speed_perturb: logger.info('Apply Speed Perturb during the training!')      
        # else:
        #     self.apply_speed_perturb = False

        if 'volume_perturb' in params and not self.is_eval:
            self.apply_volume_perturb = params['volume_perturb']
            if self.apply_volume_perturb: logger.info('Apply Volume Perturb during the training!')
        else:
            self.apply_volume_perturb = False

        self.unit2idx = load_vocab(params['vocab'])

        self.targets_dict = {}
        for text_file in self.datadict['text']:
            with open(text_file, 'r', encoding='utf-8') as t:
                for line in t:
                    parts = line.strip().split()
                    utt_id = parts[0]
                    label = []
                    for c in parts[1:]:
                        label.append(self.unit2idx[c] if c in self.unit2idx else self.unit2idx[UNK_TOKEN])
                    self.targets_dict[utt_id] = label

        self.file_list = []
        for feat_file in self.datadict['feat']:
            with open(feat_file, 'r', encoding='utf-8') as fid:
                for line in fid:
                    idx, path = line.strip().split()
                    self.file_list.append([idx, path])

        assert len(self.file_list) <= len(self.targets_dict)
Example #6
0
def main(args):

    checkpoint = torch.load(args.load_model)
    if 'params' in checkpoint:
        params = checkpoint['params']
    else:
        assert os.path.isfile(args.config), 'please specify a configure file.'
        with open(args.config, 'r') as f:
            params = yaml.load(f)

    params['data']['shuffle'] = False
    params['data']['spec_augment'] = False
    params['data']['short_first'] = False
    params['data']['batch_size'] = args.batch_size

    expdir = os.path.join('egs', params['data']['name'], 'exp',
                          params['train']['save_name'])
    decoder_set_name = 'decode_%s' % args.decode_set
    if args.load_language_model is not None:
        decoder_set_name += '_lm_lmw%.2f' % args.lm_weight
    if args.suffix is not None:
        decoder_set_name += '_%s' % args.suffix

    decode_dir = os.path.join(expdir, decoder_set_name)
    if not os.path.exists(decode_dir):
        os.makedirs(decode_dir)

    model = Transformer(params['model'])

    model.load_state_dict(checkpoint['model'])
    print('Load pre-trained model from %s' % args.load_model)

    model.eval()
    if args.ngpu > 0:
        model.cuda()

    if args.load_language_model is not None:
        lm_chkpt = torch.load(args.load_language_model)
        lm = TransformerLanguageModel(lm_chkpt['params']['model'])
        lm.load_state_dict(lm_chkpt['model'])
        lm.eval()
        if args.ngpu > 0: lm.cuda()
        print('Load pre-trained transformer language model from %s' %
              args.load_language_model)
    else:
        lm = None

    char2unit = load_vocab(params['data']['vocab'])
    unit2char = {i: c for c, i in char2unit.items()}

    data_loader = FeatureLoader(params, args.decode_set, is_eval=True)

    recognizer = TransformerRecognizer(model,
                                       lm=lm,
                                       lm_weight=args.lm_weight,
                                       unit2char=unit2char,
                                       beam_width=args.beam_width,
                                       max_len=args.max_len,
                                       penalty=args.penalty,
                                       lamda=args.lamda,
                                       ngpu=args.ngpu)

    totals = len(data_loader.dataset)
    batch_size = params['data']['batch_size']
    writer = open(os.path.join(decode_dir, 'predict.txt'), 'w')
    writerRef = open(os.path.join(decode_dir, 'reference.txt'), 'w')
    for step, (utt_id, batch) in enumerate(data_loader.loader):

        if args.ngpu > 0:
            inputs = batch['inputs'].cuda()
            inputs_length = batch['inputs_length'].cuda()
        else:
            inputs = batch['inputs']
            inputs_length = batch['inputs_length']

        preds = recognizer.recognize(inputs, inputs_length)

        targets = batch['targets']
        targets_length = batch['targets_length']

        for b in range(len(preds)):
            n = step * batch_size + b
            truth = ' '.join([
                unit2char[i.item()]
                for i in targets[b][1:targets_length[b] + 1]
            ])
            print('[%d / %d ] %s - pred : %s' %
                  (n, totals, utt_id[b], preds[b]))
            print('[%d / %d ] %s - truth: %s' % (n, totals, utt_id[b], truth))
            if utt_id[b][7] == '1':
                newpred = preds[b] + " (" + 'S1000' + "-" + utt_id[b] + ")"
                newtruth = truth + " (" + 'S1000' + "-" + utt_id[b] + ")"
            elif utt_id[b][7] == '2':
                newpred = preds[b] + " (" + 'S2000' + "-" + utt_id[b] + ")"
                newtruth = truth + " (" + 'S2000' + "-" + utt_id[b] + ")"
            elif utt_id[b][0] == 'O':
                newpred = preds[b] + " (" + 'O' + "-" + utt_id[b] + ")"
                newtruth = truth + " (" + 'O' + "-" + utt_id[b] + ")"
            else:
                newpred = preds[b] + " (" + utt_id[b][6:11] + "-" + utt_id[
                    b] + ")"
                newtruth = truth + " (" + utt_id[b][6:11] + "-" + utt_id[
                    b] + ")"

            writer.write(newpred + '\n')
            writerRef.write(newtruth + '\n')

    writer.close()
    writerRef.close()