def ctc_lambda_func(args): iy_pred, ilabels, iinput_length, ilabel_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: iy_pred = iy_pred[:, 2:, :] # no such influence return bknd.ctc_batch_cost(ilabels, iy_pred, iinput_length, ilabel_length)
def _loss_function(self, inputs): input_labels, predicted_output, input_length, label_length = inputs predicted_output = predicted_output[:, 2:, :] return (K.ctc_batch_cost(input_labels, predicted_output, input_length, label_length))
def ctc_lambda_function(args): y_true, y_pred, input_length, label_length = args return ctc_batch_cost(y_true, y_pred, input_length, label_length)
def ctc_lambda_func(self, args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, :, :] #y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda_func(self, args): y_pre , labels, input_length, label_length = args y_pre = y_pre[:,:,:] return BK.ctc_batch_cost(labels, y_pre, input_length, label_length)
def ctc_lambda_function(args): y_true_input, logit, logit_length_input, y_true_length_input = args return K.ctc_batch_cost(y_true_input, logit, logit_length_input, y_true_length_input)
def ctc_loss_layer(args): y_true, y_pred, pred_length, label_length = args batch_cost = K.ctc_batch_cost(y_true, y_pred, pred_length, label_length) #损失函数 最大似然估计 return batch_cost
def _ctc_loss(args): labels, y_pred, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_loss(args): y_pred, y_true, input_length, label_length = args y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
def focal_ctc_lambda_func(args): labels, y_pred, input_length, label_length = args ctc_loss = K.ctc_batch_cost(labels, y_pred, input_length, label_length) p = tf.exp(-ctc_loss) focal_ctc_loss = alpha * tf.pow((1 - p), gamma) * ctc_loss return focal_ctc_loss
def ctcLambdaFunc(args): yPred, labels, inputLength, labelLength = args yPred = yPred[:,2:,:] loss = K.ctc_batch_cost(labels,yPred,inputLength,labelLength) return loss
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args import tensorflow as tf return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda_func(self, args): y_pred, labels, input_length, label_length = args #print(y_pred) y_pred = y_pred[:, 2:, :] #return K.ctc_decode(y_pred,self.MS_OUTPUT_SIZE) return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
img = cv2.resize(img, (150, 50), interpolation=cv2.INTER_CUBIC) img = cv2.transpose(img,(50,150)) img =cv2.flip(img,1) # cv2.namedWindow("the window") # cv2.imshow("the window",img) # cv2.waitKey() img = (255 - img) / 256 # 反色处理 X.append([img]) Y.append(get_label(file)) # print(get_label(file)) # print(np.shape(X)) # print(np.shape(X)) # print(np.shape(X)) X = np.transpose(X, (0, 2, 3, 1)) X = np.array(X) Y = np.array(Y) return X,Y # the actual loss calc occurs here despite it not being # an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: # y_pred = y_pred[:, 2:, :] 测试感觉没影响 y_pred = y_pred[:, :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) if __name__ == '__main__': height=150 width=50 input_tensor = Input((height, width, 1)) x = input_tensor for i in range(3): x = Convolution2D(32*2**i, (3, 3), activation='relu', padding='same')(x) # x = Convolution2D(32*2**i, (3, 3), activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) conv_shape = x.get_shape() # print(conv_shape) x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])))(x) x = Dense(32, activation='relu')(x) gru_1 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru1')(x) gru_1b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(x) gru1_merged = add([gru_1, gru_1b]) ################### gru_2 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')( gru1_merged) x = concatenate([gru_2, gru_2b]) ###################### x = Dropout(0.25)(x) x = Dense(label_count, kernel_initializer='he_normal', activation='softmax')(x) base_model = Model(inputs=input_tensor, outputs=x) labels = Input(name='the_labels', shape=[seq_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out]) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta') model.summary() def test(base_model): file_list = [] X, Y = gen_image_data(r'data\test', file_list) y_pred = base_model.predict(X) shape = y_pred[:, :, :].shape # 2: out = K.get_value(K.ctc_decode(y_pred[:, :, :], input_length=np.ones(shape[0]) * shape[1])[0][0])[:, :seq_len] # 2: print() error_count=0 for i in range(len(X)): print(file_list[i]) str_src = str(os.path.split(file_list[i])[-1]).split('.')[0].split('_')[-1] print(out[i]) str_out = ''.join([str(x) for x in out[i] if x!=-1 ]) print(str_src, str_out) if str_src!=str_out: error_count+=1 print('################################',error_count) # img = cv2.imread(file_list[i]) # cv2.imshow('image', img) # cv2.waitKey()
import numpy as np import keras.backend as K import tensorflow as tf a = [1, 2, 3, 1, 2, 4, 6, 6, 6, 6] b = [3, 1, 2, 3, 5, 1, 6, 6, 6, 6] c = [2, 1, 0, 2, 3, 4, 6, 6, 6, 6] y_true = np.stack([a, b, c]) y_pred = np.random.rand(3, 15, 7).astype(np.float32) input_length = np.stack([[7], [8], [9]]) label_length = np.stack([[4], [4], [4]]) result = K.ctc_batch_cost(y_true, y_pred, input_length, label_length) print(K.eval(result))
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda_func(args): # https://www.tensorflow.org/api_docs/python/tf/keras/backend/ctc_batch_cost y_true, y_pred, input_length, label_length = args return K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
def ctc_lambda_func(args): y_true, y_pred, input_length, label_length = args y_pred = y_pred[:, 1:-1, :] return K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
def ctc_lambda_func( args ): y_pred, labels, label_lengths = args y_pred_len = [ [y_pred.shape[1] ] ] * batchSize # y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost( labels, K.softmax( y_pred ), y_pred_len, label_lengths )
def ctc_func(self,args): y_pred,labels,input_length,label_length=args return K.ctc_batch_cost(y_true=labels,y_pred=y_pred,input_length=input_length,label_length=label_length)
def compute_loss(args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_loss_function(args): y_pred, labels, input_length, label_length = args ##TODO ADD WEIRD HACK? return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda(args): labels, y_pred, input_length, label_length = args #labels包含标签的张量;y_pred包含softmax输出的张量 #input_length是y_pred中每个批元素的序列长度 y_pred = y_pred[:, :, :] #截取y_pred前三维 return K.ctc_batch_cost(labels, y_pred, input_length, label_length) #在每个批上运行ctc损失算法
def get_loss(self, model, target, output): """ Returns the loss function that can be used by the implementation- specific model. """ backend = model.get_backend() if backend.get_name() == 'keras': import keras.backend as K if self.variant is None: # Just use the built-in Keras CTC loss function. logger.debug( 'Attaching built-in Keras CTC loss function to ' 'model output "%s".', target) elif self.variant == 'warp': # Just use the built-in Keras CTC loss function. logger.info( 'Attaching Warp-CTC loss function to model ' 'output "%s".', target) if backend.get_toolchain() != 'theano': logger.error('If you want to use warp-ctc, you need to ' 'use the Theano backend to Keras.') raise ValueError('Warp-CTC is currently only supported ' 'with the Theano backend to Keras.') else: raise ValueError('Unsupported variant "{}" on loss function ' '"{}" for backend "{}".'.format( self.variant, self.get_name(), backend.get_name())) ctc_scaled = 'ctc_scaled_{}'.format(self.input_length) flattened_labels = 'ctc_flattened_labels_{}'.format(target) transcript_length = K.placeholder(ndim=2, dtype='int32', name=self.output_length) transcript = K.placeholder( ndim=2, dtype='int32', name=self.output if self.variant is None \ else flattened_labels ) utterance_length = K.placeholder( ndim=2, dtype='int32', name=self.input_length if self.relative_to is None \ else ctc_scaled ) if self.relative_to is not None: model.add_data_source( ctc_scaled, ScaledSource(model, relative_to=self.relative_to, to_this=target, scale_this=self.input_length)) if self.variant == 'warp': model.add_data_source( flattened_labels, FlattenSource(self.output, self.output_length)) if self.variant is None: out = K.ctc_batch_cost(transcript, output, utterance_length, transcript_length) else: import ctc # pylint: disable=import-error out = ctc.cpu_ctc_th(output.dimshuffle((1, 0, 2)), K.squeeze(utterance_length, -1), transcript[0] + 1, K.squeeze(transcript_length, -1)) return ( ( (self.output_length, transcript_length), (self.output if self.variant is None \ else flattened_labels, transcript), (self.input_length if self.relative_to is None \ else ctc_scaled, utterance_length) ), out ) else: raise ValueError( 'Unsupported backend "{}" for loss function "{}"'.format( backend.get_name(), self.get_name()))
def get_loss(self, model, target, output): """ Returns the loss function that can be used by the implementation- specific model. """ backend = model.get_backend() if backend.get_name() == 'keras': import keras.backend as K if 'warp' in self.variant: # Just use the built-in Keras CTC loss function. logger.info( 'Attaching Warp-CTC loss function to model ' 'output "%s".', target) if backend.get_toolchain() != 'theano': logger.error('If you want to use warp-ctc, you need to ' 'use the Theano backend to Keras.') raise ValueError('Warp-CTC is currently only supported ' 'with the Theano backend to Keras.') else: # Just use the built-in Keras CTC loss function. logger.debug( 'Attaching built-in Keras CTC loss function to ' 'model output "%s".', target) ctc_scaled = 'ctc_scaled_{}'.format(self.input_length) flattened_labels = 'ctc_flattened_labels_{}'.format(target) transcript_length = K.placeholder(ndim=2, dtype='int32', name=self.output_length) transcript = K.placeholder( ndim=2, dtype='int32', name=flattened_labels if 'warp' in self.variant \ else self.output ) utterance_length = K.placeholder( ndim=2, dtype='int32', name=self.input_length if self.relative_to is None \ else ctc_scaled ) if self.relative_to is not None: model.add_data_source( ctc_scaled, ScaledSource(model, relative_to=self.relative_to, to_this=target, scale_this=self.input_length)) if 'warp' in self.variant: model.add_data_source( flattened_labels, FlattenSource(self.output, self.output_length)) try: import ctc # pylint: disable=import-error except ImportError: logger.error( 'The warp-CTC loss function was requested, ' 'but we cannot find the "ctc" library. See our ' 'troubleshooting page for helpful tips.') raise ImportError( 'Cannot find the "ctc" library, which ' 'is needed when using the "warp" variant of the CTC ' 'loss function.') out = ctc.cpu_ctc_th(output.dimshuffle((1, 0, 2)), K.squeeze(utterance_length, -1), transcript[0] + 1, K.squeeze(transcript_length, -1)) else: out = K.ctc_batch_cost(transcript, output, utterance_length, transcript_length) if 'loss_scale' in self.variant: logger.debug('Loss scaling is active.') out = out * K.mean(K.cast(utterance_length, K.dtype(out))) / 100 return ( ( (self.output_length, transcript_length), (flattened_labels if 'warp' in self.variant \ else self.output, transcript), (self.input_length if self.relative_to is None \ else ctc_scaled, utterance_length) ), out ) elif backend.get_name() == 'pytorch': if 'warp' not in self.variant: logger.error( 'PyTorch does not include a native CTC loss ' 'function yet. However, PyTorch bindings to Warp CTC are ' 'available (SeanNaren/warp-ctc). Try installing that, and ' 'then settings variant=warp.') raise ValueError('Only Warp CTC is supported for PyTorch ' 'right now.') ctc_scaled = 'ctc_scaled_{}'.format(self.input_length) flattened_labels = 'ctc_flattened_labels_{}'.format(target) transcript_length = model.data.placeholder(self.output_length, location='cpu', data_type='int') transcript = model.data.placeholder(flattened_labels, location='cpu', data_type='int') utterance_length = model.data.placeholder( self.input_length if self.relative_to is None else ctc_scaled, location='cpu', data_type='int') if self.relative_to is not None: model.add_data_source( ctc_scaled, ScaledSource(model, relative_to=self.relative_to, to_this=target, scale_this=self.input_length)) if 'warp' in self.variant: model.add_data_source( flattened_labels, FlattenSource(self.output, self.output_length)) try: from warpctc_pytorch import CTCLoss # pytorch: disable=import-error except ImportError: logger.error( 'The warp-CTC loss function was requested, ' 'but we cannot find the "warpctc_pytorch" library. See ' 'out troubleshooting page for helpful tips.') raise ImportError( 'Cannot find the "warpctc_pytorch" library, ' 'which is needed when using the "warp" variant of the CTC ' 'loss function.') loss = model.data.move(CTCLoss()) def basic_ctc_loss(inputs, output): """ Computes CTC loss. """ return loss( output.transpose(1, 0).contiguous(), inputs[0][0] + 1, # transcript[0]+1 inputs[1].squeeze(1), # K.squeeze(utterance_length, -1), inputs[2].squeeze(1) # K.squeeze(transcript_length, -1) ) / output.size(0) if 'loss_scale' in self.variant: logger.debug('Loss scaling is active.') def loss_scale(inputs, output): """ Computes CTC loss. """ factor = inputs[1].float().mean().data[0] / 100. return basic_ctc_loss(inputs, output) * factor get_ctc_loss = loss_scale else: get_ctc_loss = basic_ctc_loss return [ [ (flattened_labels if 'warp' in self.variant \ else self.output, transcript), (self.input_length if self.relative_to is None \ else ctc_scaled, utterance_length), (self.output_length, transcript_length) ], get_ctc_loss ] else: raise ValueError( 'Unsupported backend "{}" for loss function "{}"'.format( backend.get_name(), self.get_name()))
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def call(self, y_pred): return K.ctc_batch_cost(self.labels, y_pred, self.input_length, self.label_length)
def ctc_lambda(args): labels, y_pred, input_length, label_length = args y_pred = y_pred[:, :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # print("cccccccccc:",y_pred,labels,input_length,label_length) y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def ctc_lambda_func(args): prediction, labels, prediction_lengths, label_lengths = args # prediction = prediction[:, 2:, :] return K.ctc_batch_cost(labels, K.softmax(prediction), prediction_lengths, label_lengths)
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # reorder the args because the order is shit return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def _ctc_lambda(args): prediction_batch, label_batch, prediction_lengths, label_lengths = args return backend.ctc_batch_cost(y_true=label_batch, y_pred=prediction_batch, input_length=prediction_lengths, label_length=label_lengths)
def get_loss(self, model, target, output): """ Returns the loss function that can be used by the implementation- specific model. """ backend = model.get_backend() if backend.get_name() == 'keras': import keras.backend as K if 'warp' in self.variant: # Just use the built-in Keras CTC loss function. logger.info('Attaching Warp-CTC loss function to model ' 'output "%s".', target) if backend.get_toolchain() != 'theano': logger.error('If you want to use warp-ctc, you need to ' 'use the Theano backend to Keras.') raise ValueError('Warp-CTC is currently only supported ' 'with the Theano backend to Keras.') else: # Just use the built-in Keras CTC loss function. logger.debug('Attaching built-in Keras CTC loss function to ' 'model output "%s".', target) ctc_scaled = 'ctc_scaled_{}'.format(self.input_length) flattened_labels = 'ctc_flattened_labels_{}'.format(target) transcript_length = K.placeholder( ndim=2, dtype='int32', name=self.output_length ) transcript = K.placeholder( ndim=2, dtype='int32', name=flattened_labels if 'warp' in self.variant \ else self.output ) utterance_length = K.placeholder( ndim=2, dtype='int32', name=self.input_length if self.relative_to is None \ else ctc_scaled ) if self.relative_to is not None: model.add_data_source( ctc_scaled, ScaledSource( model, relative_to=self.relative_to, to_this=target, scale_this=self.input_length ) ) if 'warp' in self.variant: model.add_data_source( flattened_labels, FlattenSource( self.output, self.output_length ) ) try: import ctc # pylint: disable=import-error except ImportError: logger.error('The warp-CTC loss function was requested, ' 'but we cannot find the "ctc" library. See our ' 'troubleshooting page for helpful tips.') raise ImportError('Cannot find the "ctc" library, which ' 'is needed when using the "warp" variant of the CTC ' 'loss function.') out = ctc.cpu_ctc_th( output.dimshuffle((1, 0, 2)), K.squeeze(utterance_length, -1), transcript[0]+1, K.squeeze(transcript_length, -1) ) else: out = K.ctc_batch_cost( transcript, output, utterance_length, transcript_length ) if 'loss_scale' in self.variant: logger.debug('Loss scaling is active.') out = out * K.mean( K.cast(utterance_length, K.dtype(out)) ) / 100 return ( ( (self.output_length, transcript_length), (flattened_labels if 'warp' in self.variant \ else self.output, transcript), (self.input_length if self.relative_to is None \ else ctc_scaled, utterance_length) ), out ) elif backend.get_name() == 'pytorch': if 'warp' not in self.variant: logger.error('PyTorch does not include a native CTC loss ' 'function yet. However, PyTorch bindings to Warp CTC are ' 'available (SeanNaren/warp-ctc). Try installing that, and ' 'then settings variant=warp.') raise ValueError('Only Warp CTC is supported for PyTorch ' 'right now.') ctc_scaled = 'ctc_scaled_{}'.format(self.input_length) flattened_labels = 'ctc_flattened_labels_{}'.format(target) transcript_length = model.data.placeholder( self.output_length, location='cpu', data_type='int' ) transcript = model.data.placeholder( flattened_labels, location='cpu', data_type='int' ) utterance_length = model.data.placeholder( self.input_length if self.relative_to is None else ctc_scaled, location='cpu', data_type='int' ) if self.relative_to is not None: model.add_data_source( ctc_scaled, ScaledSource( model, relative_to=self.relative_to, to_this=target, scale_this=self.input_length ) ) if 'warp' in self.variant: model.add_data_source( flattened_labels, FlattenSource( self.output, self.output_length ) ) try: from warpctc_pytorch import CTCLoss # pytorch: disable=import-error except ImportError: logger.error('The warp-CTC loss function was requested, ' 'but we cannot find the "warpctc_pytorch" library. See ' 'out troubleshooting page for helpful tips.') raise ImportError('Cannot find the "warpctc_pytorch" library, ' 'which is needed when using the "warp" variant of the CTC ' 'loss function.') loss = model.data.move(CTCLoss()) def basic_ctc_loss(inputs, output): """ Computes CTC loss. """ return loss( output.transpose(1, 0).contiguous(), inputs[0][0]+1, # transcript[0]+1 inputs[1].squeeze(1), # K.squeeze(utterance_length, -1), inputs[2].squeeze(1) # K.squeeze(transcript_length, -1) ) / output.size(0) if 'loss_scale' in self.variant: logger.debug('Loss scaling is active.') def loss_scale(inputs, output): """ Computes CTC loss. """ factor = inputs[1].float().mean().data[0] / 100. return basic_ctc_loss(inputs, output) * factor get_ctc_loss = loss_scale else: get_ctc_loss = basic_ctc_loss return [ [ (flattened_labels if 'warp' in self.variant \ else self.output, transcript), (self.input_length if self.relative_to is None \ else ctc_scaled, utterance_length), (self.output_length, transcript_length) ], get_ctc_loss ] else: raise ValueError('Unsupported backend "{}" for loss function "{}"' .format(backend.get_name(), self.get_name()))