Esempio n. 1
0
  def get_next_split_data (self):
    '''
    output: 
      feat_list: list of np matrix [num_frames, feat_dim]
      asr_label_list: list of int32 np array [num_frames] 
      sid_label_list: list of int32
    '''
    p1 = Popen (['splice-feats', '--print-args=false', '--left-context='+str(self.splice),
                 '--right-context='+str(self.splice), 
                 'scp:'+self.tmp_dir+'/split.'+self.name+'.'+str(self.split_data_counter)+'.scp',
                 'ark:-'], stdout=PIPE, stderr=DEVNULL)
    p2 = Popen (['apply-cmvn', '--print-args=false', '--norm-vars=true', self.exp+'/cmvn.mat',
                 'ark:-', 'ark:-'], stdin=p1.stdout, stdout=PIPE, stderr=DEVNULL)

    feat_list = []
    asr_label_list = []
    sid_label_list = []
    
    while True:
      uid, feat = kaldi_IO.read_utterance (p2.stdout)
      if uid == None:
        break;
      if uid in self.asr_labels and uid in self.sid_labels:
        feat_list.append (feat)
        asr_label_list.append (self.asr_labels[uid])
        sid_label_list.append (self.sid_labels[uid])

    p2.stdout.close()
    
    if len(feat_list) == 0 or len(asr_label_list) == 0:
      raise RuntimeError("No feats are loaded! please check feature and labels, and make sure they are matched.")

    return (feat_list, asr_label_list, sid_label_list)
Esempio n. 2
0
    def get_next_split_data(self):
        '''
    output: 
      feat_list: list of np matrix [num_frames, feat_dim]
      label_list: list of int32 np array [num_frames] 
    '''
        p1 = Popen([
            'splice-feats', '--print-args=false', '--left-context=' +
            str(self.splice), '--right-context=' + str(self.splice),
            'scp:' + self.temp_dir + '/split.' + self.name + '.' +
            str(self.split_data_counter) + '.scp', 'ark:-'
        ],
                   stdout=PIPE,
                   stderr=DEVNULL)
        p2 = Popen([
            'apply-cmvn', '--print-args=false', '--norm-vars=true',
            self.exp + '/cmvn.mat', 'ark:-', 'ark:-'
        ],
                   stdin=p1.stdout,
                   stdout=PIPE,
                   stderr=DEVNULL)

        feat_list = []
        label_list = []
        while True:
            uid, feat = kaldi_IO.read_utterance(p2.stdout)
            if uid == None:
                # no more utterance, return
                return (feat_list, label_list)
            if uid in self.labels:
                feat_list.append(feat)
                label_list.append(self.labels[uid])
        # read done

        p1.stdout.close()
Esempio n. 3
0
    def get_next_split_data(self):
        '''
    output: 
      feat_list: list of np matrix [num_frames, feat_dim]
      label_list: list of int32 np array [num_frames] 
    '''
        cmd = [ 'copy-feats', 'scp:' + self.tmp_dir + '/split.' + self.name + '.' + \
               str(self.split_data_counter) + '.scp', 'ark:- |' ]
        cmd.extend([
            'splice-feats', '--left-context=' + str(self.splice),
            '--right-context=' + str(self.splice), 'ark:-', 'ark:-|'
        ])
        cmd.extend([
            'apply-cmvn', '--norm-vars=true', self.exp + '/cmvn.mat', 'ark:-',
            'ark:-'
        ])
        p1 = Popen(' '.join(cmd), shell=True, stdout=PIPE, stderr=DEVNULL)

        feat_list = []
        label_list = []

        while True:
            uid, feat = kaldi_IO.read_utterance(p1.stdout)
            if uid == None:
                break
            if uid in self.labels:
                feat_list.append(feat)
                label_list.append(self.labels[uid])

        p1.stdout.close()

        if len(feat_list) == 0 or len(label_list) == 0:
            raise RuntimeError("No feats are loaded! please check feature and labels," + \
                               "and make sure they are matched.")

        return (feat_list, label_list)
Esempio n. 4
0
logger.info("loading the model %s", args.model_file)
model_name=open(args.model_file, 'r').read()
nnet.read(model_name, num_multi = num_multi)

prior_counts = np.genfromtxt (args.prior_counts_file)
priors = prior_counts / prior_counts.sum()
log_priors = np.log(priors)

ark_in = sys.stdin.buffer
#ark_in = open('stdin','r')
ark_out = sys.stdout.buffer
encoding = sys.stdout.encoding
signal (SIGPIPE, SIG_DFL)

p1 = Popen(['splice-feats', '--print-args=false', '--left-context='+str(splice), 
            '--right-context='+str(splice), 'ark:-', 'ark:-'], stdin=ark_in, stdout=PIPE, stderr=DEVNULL)
p2 = Popen (['apply-cmvn', '--print-args=false', '--norm-vars=true', srcdir+'/cmvn.mat', 
             'ark:-', 'ark:-'], stdin=p1.stdout, stdout=PIPE, stderr=DEVNULL)

while True:
  uid, feats = kaldi_IO.read_utterance(p2.stdout)
  if uid == None:
    # we are done
    break

  log_post = nnet.predict (feats, take_log = False)
  log_likes = log_post - log_priors
  kaldi_IO.write_utterance(uid, log_likes, ark_out, encoding)

p1.stdout.close