def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['phonemes'] drops_forw_states_val = inputs['drops_forw_states'] drops_forw_cells_val = inputs['drops_forw_cells'] drops_forw_igates_val = inputs['drops_forw_igates'] drops_back_states_val = inputs['drops_back_states'] drops_back_cells_val = inputs['drops_back_cells'] drops_back_igates_val = inputs['drops_back_igates'] y_mask_val = inputs['phonemes_mask'] y_hat = self.prediction_func( x_val, x_mask_val, drops_forw_states_val, drops_forw_cells_val, drops_forw_igates_val, drops_back_states_val, drops_back_cells_val, drops_back_igates_val) y_predict = numpy.argmax(y_hat, axis=2) for batch in xrange(inputs['features'].shape[1]): y_val_cur = y_val[:sum(y_mask_val[:, batch]), batch] predicted = y_predict[:sum(x_mask_val[:, batch]), batch] predicted = ctc_strip(predicted) predictions = [ self.phoneme_dict[phone_ind] for phone_ind in predicted if self.phoneme_dict[phone_ind] not in self.black_list ] targets = [ self.phoneme_dict[phone_ind] for phone_ind in y_val_cur if self.phoneme_dict[phone_ind] not in self.black_list ] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' % i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['phonemes'] y_mask_val = inputs['phonemes_mask'] for batch_ind in xrange(inputs['features'].shape[1]): if x_val.ndim == 2: input_beam = numpy.tile(x_val[:, batch_ind][:, None], (1, self.beam_size)) else: input_beam = numpy.tile(x_val[:, batch_ind, :][:, None, :], (1, self.beam_size, 1)) input_mask_beam = numpy.tile(x_mask_val[:, batch_ind][:, None], (1, self.beam_size)) predictions, _ = self.beam_search.search( {self.x: input_beam, self.x_mask: input_mask_beam}, self.eol_symbol, 100) predictions = [self.phoneme_dict[phone_ind] for phone_ind in predictions[0] if self.phoneme_dict[phone_ind] not in self.black_list][1:-1] targets = y_val[:sum(y_mask_val[:, batch_ind]), batch_ind] targets = [self.phoneme_dict[phone_ind] for phone_ind in targets if self.phoneme_dict[phone_ind] not in self.black_list][1:-1] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' %i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] #transpose y_val = inputs['phonemes'] y_mask_val = inputs['phonemes_mask'] y_hat = self.prediction_func(x_val, x_mask_val) y_predict = numpy.argmax(y_hat, axis=2) for batch in xrange(inputs['features'].shape[0]): y_val_cur = y_val[:sum(y_mask_val[:, batch]), batch] predicted = y_predict[:sum(x_mask_val[:, batch]), batch] predicted = ctc_strip(predicted) predictions = [self.phoneme_dict[phone_ind] for phone_ind in predicted if self.phoneme_dict[phone_ind] not in self.black_list] targets = [self.phoneme_dict[phone_ind] for phone_ind in y_val_cur if self.phoneme_dict[phone_ind] not in self.black_list] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' %i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['triphones'] y_mask_val = inputs['triphones_mask'] y_hat = self.prediction_func(x_val, x_mask_val) y_predict = numpy.argmax(y_hat, axis=2) for batch in xrange(inputs['features'].shape[1]): y_val_cur = y_val[:sum(y_mask_val[:, batch]), batch] predicted = y_predict[:sum(x_mask_val[:, batch]), batch] predictions = [str(trihone) for triphone in predicted] targets = [str(triphone) for triphone in y_val_cur] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' % i) loss += (sum(numpy.not_equal(y_val_cur, predicted))).astype( 'float32') /\ len(predicted) num_examples += 1 #print '.. found sequence example:', ' '.join(predictions) #print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' fer = loss / num_examples return {'fer': fer}
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['triphones'] y_mask_val = inputs['triphones_mask'] y_hat = self.prediction_func(x_val, x_mask_val) y_predict = numpy.argmax(y_hat, axis=2) for batch in xrange(inputs['features'].shape[1]): y_val_cur = y_val[:sum(y_mask_val[:, batch]), batch] predicted = y_predict[:sum(x_mask_val[:, batch]), batch] predictions = [str(trihone) for triphone in predicted] targets = [str(triphone) for triphone in y_val_cur] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' %i) loss += (sum(numpy.not_equal(y_val_cur, predicted))).astype( 'float32') /\ len(predicted) num_examples += 1 #print '.. found sequence example:', ' '.join(predictions) #print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' fer = loss / num_examples return {'fer': fer}
def verify_groupby(*args, **kwargs): if 'n' in kwargs: if 'm' not in kwargs: raise ValueError('got n without m') pickle = True n = kwargs.pop('n') m = kwargs.pop('m') elif 'm' in kwargs: raise ValueError('got m without n') else: pickle = False n = m = None if 'pickle_outer' in kwargs: pickle_outer = kwargs.pop('pickle_outer') else: pickle_outer = None reference = itertools.groupby(*args, **kwargs) actual = groupby(*args, **kwargs) outer_iters = 0 while True: if outer_iters == pickle_outer: actual = cPickle.loads(cPickle.dumps(actual)) try: ref_key, ref_grouper = next(reference) except StopIteration: check_stops(actual) break try: actual_key, actual_grouper = next(actual) except StopIteration: assert False, "prematurely exhausted; expected {}".format(ref_key) if pickle: this_n = n[0] n = n[1:] this_m = m[0] m = m[1:] verify_pickle(partial(_identity, actual_grouper), partial(_identity, ref_grouper), this_n, this_m) else: verify_same(partial(_identity, actual_grouper), partial(_identity, ref_grouper), None) outer_iters += 1
def network_evaluation(predict_fn, data_stream, phoneme_dict, black_list): ##################### # get data iterator # ##################### data_iterator = data_stream.get_epoch_iterator() ######################### # set evaluation result # ######################### total_ctc = 0. total_per = 0. total_sample = 0. total_batch = 0. ############## # evaluation # ############## # for each batch iteration for i, data in enumerate(data_iterator): # get input data input_data = data[0] input_mask = data[1] # get target data target_data = data[2] target_mask = data[3] # get prediction data predict_output = predict_fn(input_data, input_mask, target_data, target_mask) predict_idx = predict_output[0] predict_ctc_cost = predict_output[1] pred_cost_per_char = predict_output[2] # add up ctc cost total_ctc += predict_ctc_cost total_batch += 1. # for each data, per evaluation for j in range(input_data.shape[0]): cur_target_data = target_data[j, :numpy.sum(target_mask[j])] cur_predict_data = predict_idx[j, :numpy.sum(input_mask[j])] cur_predict_data = ctc_strip(cur_predict_data) cur_target_phoneme = [phoneme_dict[phone_ind] for phone_ind in cur_target_data if phoneme_dict[phone_ind] not in black_list] cur_predict_phoneme = [phoneme_dict[phone_ind] for phone_ind in cur_predict_data if phoneme_dict[phone_ind] not in black_list] targets = [x[0] for x in groupby(cur_target_phoneme)] predictions = [x[0] for x in groupby(cur_predict_phoneme)] total_per += Evaluation.wer([predictions], [targets]) total_sample += 1 total_ctc = total_ctc/total_batch total_per = total_per.sum()/total_sample return total_ctc, total_per
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['phonemes'] y_mask_val = inputs['phonemes_mask'] for batch_ind in xrange(inputs['features'].shape[1]): if x_val.ndim == 2: input_beam = numpy.tile(x_val[:, batch_ind][:, None], (1, self.beam_size)) else: input_beam = numpy.tile(x_val[:, batch_ind, :][:, None, :], (1, self.beam_size, 1)) input_mask_beam = numpy.tile(x_mask_val[:, batch_ind][:, None], (1, self.beam_size)) predictions, _ = self.beam_search.search( { self.x: input_beam, self.x_mask: input_mask_beam }, self.eol_symbol, 100) predictions = [ self.phoneme_dict[phone_ind] for phone_ind in predictions[0] if self.phoneme_dict[phone_ind] not in self.black_list ][1:-1] targets = y_val[:sum(y_mask_val[:, batch_ind]), batch_ind] targets = [ self.phoneme_dict[phone_ind] for phone_ind in targets if self.phoneme_dict[phone_ind] not in self.black_list ][1:-1] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' % i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}