def plot_bboxes(bbox_plotter, model, input_image, iteration): outputs = model.get_outputs() transform_params, interpolated_areas = bbox_plotter.get_area_data( 1, outputs[0], outputs[1], ) labels = strip_prediction(np.argmax(outputs[2].asnumpy(), axis=1), args.blank_label) labels = ''.join([chr(char_map[str(x)]) for x in labels]) bbox_plotter.save_extracted_regions( input_image, interpolated_areas.asnumpy(), transform_params.asnumpy(), iteration, labels, )
def plot_bboxes(execution_params): data_iter = execution_params.locals['train_data'] iters_per_epoch = num_data // data_iter.batch_size batch_size = data_iter.batch_size if batch_num is None else batch_num input_data_shapes = { description.name: (batch_size, ) + description.shape[1:] for description in data_iter.provide_data } for label_data in data_iter.provide_label: input_data_shapes[ label_data.name] = (batch_size, ) + label_data.shape[1:] executor = stn_output.simple_bind( execution_params.locals['ctx'][0], grad_req='null', **input_data_shapes) # set weights of executor original_executor = execution_params.locals['executor_manager'] params = executor.arg_dict self.copy_params(params, original_executor, attr_name='param') aux_params = executor.aux_dict self.copy_params(aux_params, original_executor, attr_name='aux') params['data'][:] = mx.nd.array( np.tile(data, (batch_size, 1, 1, 1))) params['softmax_label'] = mx.nd.array(label) executor.forward(is_train=False) transform_params, interpolated_areas = self.get_area_data( batch_size, executor.outputs[0], executor.outputs[1], ) if self.show_labels: if self.label_majority_vote: labels = self.majority_vote( np.argmax(executor.outputs[2].asnumpy(), axis=1)) labels = strip_prediction(labels, self.blank_label) else: labels = strip_prediction( np.argmax(executor.outputs[2].asnumpy(), axis=1), self.blank_label) labels = ''.join([chr(self.label_map[str(x)]) for x in labels]) else: labels = '' extra_transform_params = None extra_interpolated_areas = None if self.plot_extra_loc: extra_transform_params = executor.outputs[3] size, num_params = extra_transform_params.shape extra_transform_params = extra_transform_params.reshape( (size // batch_size, batch_size, num_params)) extra_transform_params = mx.nd.transpose( extra_transform_params, axes=(1, 0, 2))[0] extra_interpolated_areas = executor.outputs[4] size, num_channels, height, width = extra_interpolated_areas.shape extra_interpolated_areas = extra_interpolated_areas.reshape( (size // batch_size, batch_size, num_channels, height, width)) extra_interpolated_areas = mx.nd.transpose( extra_interpolated_areas, axes=(1, 0, 2, 3, 4))[0] gt_bboxes = None if show_gt_bboxes: num_timesteps = size // batch_size _, gt_bboxes = np.split(label, [-(num_timesteps * 4)]) gt_bboxes = gt_bboxes.reshape(num_timesteps, 4) iteration = execution_params.epoch * iters_per_epoch + execution_params.nbatch self.save_extracted_regions( data, interpolated_areas.asnumpy(), transform_params.asnumpy(), iteration, labels, gt_bboxes=gt_bboxes, extra_transform_params=extra_transform_params.asnumpy() if extra_transform_params is not None else None, extra_interpolated_areas=extra_interpolated_areas.asnumpy() if extra_interpolated_areas is not None else None, )
def plot_bboxes(execution_params): data_iter = execution_params.locals['train_data'] iters_per_epoch = num_data // data_iter.batch_size batch_size = data_iter.batch_size if batch_num is None else batch_num input_data_shapes = {description.name: (batch_size, ) + description.shape[1:] for description in data_iter.provide_data} for label_data in data_iter.provide_label: input_data_shapes[label_data.name] = (batch_size, ) + label_data.shape[1:] executor = stn_output.simple_bind(execution_params.locals['ctx'][0], grad_req='null', **input_data_shapes) # set weights of executor original_executor = execution_params.locals['executor_manager'] params = executor.arg_dict self.copy_params(params, original_executor, attr_name='param') aux_params = executor.aux_dict self.copy_params(aux_params, original_executor, attr_name='aux') params['data'][:] = mx.nd.array(np.tile(data, (batch_size, 1, 1, 1))) params['softmax_label'] = mx.nd.array(label) executor.forward(is_train=False) transform_params, interpolated_areas = self.get_area_data( batch_size, executor.outputs[0], executor.outputs[1], ) if self.show_labels: if self.label_majority_vote: labels = self.majority_vote(np.argmax(executor.outputs[2].asnumpy(), axis=1)) labels = strip_prediction(labels, self.blank_label) else: labels = strip_prediction(np.argmax(executor.outputs[2].asnumpy(), axis=1), self.blank_label) labels = ''.join([chr(self.label_map[str(x)]) for x in labels]) else: labels = '' extra_transform_params = None extra_interpolated_areas = None if self.plot_extra_loc: extra_transform_params = executor.outputs[3] size, num_params = extra_transform_params.shape extra_transform_params = extra_transform_params.reshape((size // batch_size, batch_size, num_params)) extra_transform_params = mx.nd.transpose(extra_transform_params, axes=(1, 0, 2))[0] extra_interpolated_areas = executor.outputs[4] size, num_channels, height, width = extra_interpolated_areas.shape extra_interpolated_areas = extra_interpolated_areas.reshape((size // batch_size, batch_size, num_channels, height, width)) extra_interpolated_areas = mx.nd.transpose(extra_interpolated_areas, axes=(1, 0, 2, 3, 4))[0] gt_bboxes = None if show_gt_bboxes: num_timesteps = size // batch_size _, gt_bboxes = np.split(label, [-(num_timesteps * 4)]) gt_bboxes = gt_bboxes.reshape(num_timesteps, 4) iteration = execution_params.epoch * iters_per_epoch + execution_params.nbatch self.save_extracted_regions( data, interpolated_areas.asnumpy(), transform_params.asnumpy(), iteration, labels, gt_bboxes=gt_bboxes, extra_transform_params=extra_transform_params.asnumpy() if extra_transform_params is not None else None, extra_interpolated_areas=extra_interpolated_areas.asnumpy() if extra_interpolated_areas is not None else None, )
the_image = Image.open(file_name) the_image = the_image.convert('L') the_image = the_image.resize((input_size.width, input_size.height), Image.ANTIALIAS) image = np.asarray(the_image, dtype=np.float32)[np.newaxis, np.newaxis, ...] image /= 255 input_batch = Batch(data=[mx.nd.array(image)], label=[mx.nd.array(label)]) model.forward(input_batch, is_train=False) if args.plot: plot_bboxes(bbox_plotter, model, image, idx) predictions = model.get_outputs()[2].asnumpy() predicted_classes = np.argmax(predictions, axis=1) # cut all word end predictions predicted_classes = strip_prediction(predicted_classes, int(reverse_char_map[args.blank_symbol])) predicted_word = ''.join([chr(char_map[str(p)]) for p in predicted_classes]).replace(' ', '') distance = editdistance.eval(gt_word, predicted_word) print("{} - {}\t\t{}: {}".format(idx, gt_word, predicted_word, distance)) results = [prediction == label for prediction, label in zip(predicted_word, gt_word)] if all(results): num_correct += 1 num_overall += 1 print("Accuracy: {}".format(num_correct / num_overall))
label=[mx.nd.array(labels)]) model.forward(input_batch, is_train=False) if args.plot: plot_bboxes(bbox_plotter, model, image, idx) # extract predictions from model predictions = model.get_outputs()[2].asnumpy() # get predicted classes predicted_classes = np.argmax(predictions, axis=1) # interpret batch or predictions as three different predictions (for each time step one prediction) predicted_classes = predicted_classes.reshape(3, -1) # strip blanks and double symbols from prediction predicted_classes = [ strip_prediction(predicted_classes[i], args.blank_label) for i in range(len(predicted_classes)) ] # concat groundtruth gt_words = [l for l in [label_1, label_2, label_3]] # convert groundtruth from labels to chars gt_words = [ ''.join([chr(char_map[str(p)]) for p in g]) for g in gt_words ] # strip blank labels from groundtruth gt_words = [ g.strip(chr(char_map[str(args.blank_label)])) for g in gt_words ] # convert predicted classes to characters predicted_words = [
input_batch = Batch(data=[mx.nd.array(image)], label=[mx.nd.array(labels)]) model.forward(input_batch, is_train=False) if args.plot: plot_bboxes(bbox_plotter, model, image, idx) # extract predictions from model predictions = model.get_outputs()[2].asnumpy() # get predicted classes predicted_classes = np.argmax(predictions, axis=1) # interpret batch or predictions as three different predictions (for each time step one prediction) predicted_classes = predicted_classes.reshape(3, -1) # strip blanks and double symbols from prediction predicted_classes = [strip_prediction(predicted_classes[i], args.blank_label) for i in range(len(predicted_classes))] # concat groundtruth gt_words = [l for l in [label_1, label_2, label_3]] # convert groundtruth from labels to chars gt_words = [''.join([chr(char_map[str(p)]) for p in g]) for g in gt_words] # strip blank labels from groundtruth gt_words = [g.strip(chr(char_map[str(args.blank_label)])) for g in gt_words] # convert predicted classes to characters predicted_words = [''.join([chr(char_map[str(g)]) for g in p]) for p in predicted_classes] model_predictions.append((file_name, predicted_words)) # eval predictions for gt_word, predicted_word in zip(gt_words, predicted_words): distance = editdistance.eval(gt_word, predicted_word) print("{} - {}\t\t{}: {}".format(idx, gt_word, predicted_word, distance))
image = np.asarray(the_image, dtype=np.float32)[np.newaxis, np.newaxis, ...] image /= 255 input_batch = Batch(data=[mx.nd.array(image)], label=[mx.nd.array(label)]) model.forward(input_batch, is_train=False) if args.plot: plot_bboxes(bbox_plotter, model, image, idx) predictions = model.get_outputs()[2].asnumpy() predicted_classes = np.argmax(predictions, axis=1) # cut all word end predictions predicted_classes = strip_prediction( predicted_classes, int(reverse_char_map[args.blank_symbol])) predicted_word = ''.join([ chr(char_map[str(p)]) for p in predicted_classes ]).replace(' ', '') model_predictions.append((file_name, predicted_word)) distance = editdistance.eval(gt_word, predicted_word) print("{} - {}\t\t{}: {}".format(idx, gt_word, predicted_word, distance)) results = [ prediction == label for prediction, label in zip(predicted_word, gt_word) ] if all(results): num_correct += 1 num_overall += 1