class MakeMovie(BaseCommand): def parse_args(self, args): parser = argparse.ArgumentParser(prog='makemovie') parser.add_argument('--tub', help='The tub to make movie from') parser.add_argument( '--out', default='tub_movie.mp4', help='The movie filename to create. default: tub_movie.mp4') parser.add_argument( '--config', default='./config.py', help='location of config file to use. default: ./config.py') parser.add_argument('--model', default='None', help='the model to use to show control outputs') parser.add_argument('--model_type', default='categorical', help='the model type to load') parser.add_argument( '--salient', action="store_true", help='should we overlay salient map showing avtivations') parser.add_argument('--start', type=int, default=1, help='first frame to process') parser.add_argument('--end', type=int, default=-1, help='last frame to process') parser.add_argument('--scale', type=int, default=2, help='make image frame output larger by X mult') parsed_args = parser.parse_args(args) return parsed_args, parser def run(self, args): ''' Load the images from a tub and create a movie from them. Movie ''' import moviepy.editor as mpy args, parser = self.parse_args(args) if args.tub is None: parser.print_help() return if args.salient: #imported like this, we make TF conditional on use of --salient #and we keep the context maintained throughout our callbacks to #compute the salient mask from keras import backend as K import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' conf = os.path.expanduser(args.config) if not os.path.exists(conf): print("No config file at location: %s. Add --config to specify\ location or run from dir containing config.py." % conf) return try: cfg = dk.load_config(conf) except: print("Exception while loading config from", conf) return self.tub = Tub(args.tub) self.num_rec = self.tub.get_num_records() if args.start == 1: self.start = self.tub.get_index(shuffled=False)[0] else: self.start = args.start if args.end != -1: self.end = args.end else: self.end = self.num_rec - self.start self.num_rec = self.end - self.start self.iRec = args.start self.scale = args.scale self.keras_part = None self.convolution_part = None if not args.model == "None": self.keras_part = get_model_by_type(args.model_type, cfg=cfg) self.keras_part.load(args.model) self.keras_part.compile() if args.salient: self.init_salient(self.keras_part.model) #This method nested in this way to take the conditional import of TF #in a manner that extends to this callback. Done this way, we avoid #importing in the below method, which triggers a new cuda device allocation #each call. def compute_visualisation_mask(img): #from https://github.com/ermolenkodev/keras-salient-object-visualisation activations = self.functor([np.array([img])]) activations = [ np.reshape( img, (1, img.shape[0], img.shape[1], img.shape[2])) ] + activations upscaled_activation = np.ones((3, 6)) for layer in [5, 4, 3, 2, 1]: averaged_activation = np.mean( activations[layer], axis=3).squeeze(axis=0) * upscaled_activation output_shape = (activations[layer - 1].shape[1], activations[layer - 1].shape[2]) x = tf.constant( np.reshape(averaged_activation, (1, averaged_activation.shape[0], averaged_activation.shape[1], 1)), tf.float32) conv = tf.nn.conv2d_transpose( x, self.layers_kernels[layer], output_shape=(1, output_shape[0], output_shape[1], 1), strides=self.layers_strides[layer], padding='VALID') with tf.Session() as session: result = session.run(conv) upscaled_activation = np.reshape(result, output_shape) final_visualisation_mask = upscaled_activation return (final_visualisation_mask - np.min(final_visualisation_mask)) / ( np.max(final_visualisation_mask) - np.min(final_visualisation_mask)) self.compute_visualisation_mask = compute_visualisation_mask print('making movie', args.out, 'from', self.num_rec, 'images') clip = mpy.VideoClip(self.make_frame, duration=(self.num_rec // cfg.DRIVE_LOOP_HZ) - 1) clip.write_videofile(args.out, fps=cfg.DRIVE_LOOP_HZ) print('done') def draw_model_prediction(self, record, img): ''' query the model for it's prediction, draw the user input and the predictions as green and blue lines on the image ''' if self.keras_part is None: return import cv2 user_angle = float(record["user/angle"]) user_throttle = float(record["user/throttle"]) pilot_angle, pilot_throttle = self.keras_part.run(img) a1 = user_angle * 45.0 l1 = user_throttle * 3.0 * 80.0 a2 = pilot_angle * 45.0 l2 = pilot_throttle * 3.0 * 80.0 p1 = tuple((74, 119)) p2 = tuple((84, 119)) p11 = tuple( (int(p1[0] + l1 * math.cos((a1 + 270.0) * math.pi / 180.0)), int(p1[1] + l1 * math.sin((a1 + 270.0) * math.pi / 180.0)))) p22 = tuple( (int(p2[0] + l2 * math.cos((a2 + 270.0) * math.pi / 180.0)), int(p2[1] + l2 * math.sin((a2 + 270.0) * math.pi / 180.0)))) cv2.line(img, p1, p11, (0, 255, 0), 2) cv2.line(img, p2, p22, (0, 0, 255), 2) def init_salient(self, model): #from https://github.com/ermolenkodev/keras-salient-object-visualisation from keras.layers import Input, Dense, merge from keras.models import Model from keras.layers import Convolution2D, MaxPooling2D, Reshape, BatchNormalization from keras.layers import Activation, Dropout, Flatten, Dense img_in = Input(shape=(120, 160, 3), name='img_in') x = img_in x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu', name='conv1')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name='conv2')(x) x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu', name='conv3')(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name='conv4')(x) conv_5 = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu', name='conv5')(x) self.convolution_part = Model(inputs=[img_in], outputs=[conv_5]) for layer_num in ('1', '2', '3', '4', '5'): self.convolution_part.get_layer('conv' + layer_num).set_weights( model.get_layer('conv2d_' + layer_num).get_weights()) from keras import backend as K import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' self.inp = self.convolution_part.input # input placeholder self.outputs = [ layer.output for layer in self.convolution_part.layers[1:] ] # all layer outputs self.functor = K.function([self.inp], self.outputs) kernel_3x3 = tf.constant( np.array([[[[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]]]]), tf.float32) kernel_5x5 = tf.constant( np.array([[[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]]]), tf.float32) self.layers_kernels = { 5: kernel_3x3, 4: kernel_3x3, 3: kernel_5x5, 2: kernel_5x5, 1: kernel_5x5 } self.layers_strides = { 5: [1, 1, 1, 1], 4: [1, 2, 2, 1], 3: [1, 2, 2, 1], 2: [1, 2, 2, 1], 1: [1, 2, 2, 1] } def draw_salient(self, img): #from https://github.com/ermolenkodev/keras-salient-object-visualisation import cv2 alpha = 0.004 beta = 1.0 - alpha salient_mask = self.compute_visualisation_mask(img) salient_mask_stacked = np.dstack((salient_mask, salient_mask)) salient_mask_stacked = np.dstack((salient_mask_stacked, salient_mask)) blend = cv2.addWeighted(img.astype('float32'), alpha, salient_mask_stacked, beta, 0.0) return blend def make_frame(self, t): ''' Callback to return an image from from our tub records. This is called from the VideoClip as it references a time. We don't use t to reference the frame, but instead increment a frame counter. This assumes sequential access. ''' if self.iRec >= self.end: return None rec = None while rec is None and self.iRec < self.end: try: rec = self.tub.get_record(self.iRec) except Exception as e: print(e) print("Failed to get image for frame", self.iRec) self.iRec = self.iRec + 1 rec = None image = rec['cam/image_array'] if self.convolution_part: image = self.draw_salient(image) image = image * 255 image = image.astype('uint8') self.draw_model_prediction(rec, image) if self.scale != 1: import cv2 h, w, d = image.shape dsize = (w * self.scale, h * self.scale) image = cv2.resize(image, dsize=dsize, interpolation=cv2.INTER_CUBIC) self.iRec = self.iRec + 1 return image # returns a 8-bit RGB array
class MakeMovie(object): def __init__(self): self.deg_to_rad = math.pi / 180.0 def run(self, args, parser): ''' Load the images from a tub and create a movie from them. Movie ''' if args.tub is None: print("ERR>> --tub argument missing.") parser.print_help() return if args.type is None and args.model is not None: print("ERR>> --type argument missing. Required when providing a model.") parser.print_help() return if args.salient: if args.model is None: print("ERR>> salient visualization requires a model. Pass with the --model arg.") parser.print_help() conf = os.path.expanduser(args.config) if not os.path.exists(conf): print("No config file at location: %s. Add --config to specify\ location or run from dir containing config.py." % conf) return self.cfg = dk.load_config(conf) self.tub = Tub(args.tub) self.index = self.tub.get_index(shuffled=False) start = args.start self.end = args.end if args.end != -1 else len(self.index) if self.end >= len(self.index): self.end = len(self.index) - 1 num_frames = self.end - start self.iRec = start self.scale = args.scale self.keras_part = None self.do_salient = False if args.model is not None: self.keras_part = get_model_by_type(args.type, cfg=self.cfg) self.keras_part.load(args.model) self.keras_part.compile() if args.salient: self.do_salient = self.init_salient(self.keras_part.model) print('making movie', args.out, 'from', num_frames, 'images') clip = mpy.VideoClip(self.make_frame, duration=((num_frames - 1) / self.cfg.DRIVE_LOOP_HZ)) clip.write_videofile(args.out, fps=self.cfg.DRIVE_LOOP_HZ) def draw_user_input(self, record, img): ''' Draw the user input as a green line on the image ''' import cv2 user_angle = float(record["user/angle"]) user_throttle = float(record["user/throttle"]) height, width, _ = img.shape length = height a1 = user_angle * 45.0 l1 = user_throttle * length mid = width // 2 - 1 p1 = tuple((mid - 2, height - 1)) p11 = tuple((int(p1[0] + l1 * math.cos((a1 + 270.0) * self.deg_to_rad)), int(p1[1] + l1 * math.sin((a1 + 270.0) * self.deg_to_rad)))) # user is green, pilot is blue cv2.line(img, p1, p11, (0, 255, 0), 2) def draw_model_prediction(self, record, img): ''' query the model for it's prediction, draw the predictions as a blue line on the image ''' if self.keras_part is None: return import cv2 expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape # normalize image before prediction pred_img = img.astype(np.float32) / 255.0 # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1,)) actual = pred_img.shape if expected != actual: print("expected input dim", expected, "didn't match actual dim", actual) return pilot_angle, pilot_throttle = self.keras_part.run(pred_img) height, width, _ = pred_img.shape length = height a2 = pilot_angle * 45.0 l2 = pilot_throttle * length mid = width // 2 - 1 p2 = tuple((mid + 2, height - 1)) p22 = tuple((int(p2[0] + l2 * math.cos((a2 + 270.0) * self.deg_to_rad)), int(p2[1] + l2 * math.sin((a2 + 270.0) * self.deg_to_rad)))) # user is green, pilot is blue cv2.line(img, p2, p22, (0, 0, 255), 2) def draw_steering_distribution(self, record, img): ''' query the model for it's prediction, draw the distribution of steering choices ''' from donkeycar.parts.keras import KerasCategorical if self.keras_part is None or type(self.keras_part) is not KerasCategorical: return import cv2 pred_img = img.reshape((1,) + img.shape) angle_binned, _ = self.keras_part.model.predict(pred_img) x = 4 dx = 4 y = 120 - 4 iArgMax = np.argmax(angle_binned) for i in range(15): p1 = (x, y) p2 = (x, y - int(angle_binned[0][i] * 100.0)) if i == iArgMax: cv2.line(img, p1, p2, (255, 0, 0), 2) else: cv2.line(img, p1, p2, (200, 200, 200), 2) x += dx def init_salient(self, model): # Utility to search for layer index by name. # Alternatively we can specify this as -1 since it corresponds to the last layer. first_output_name = None for i, layer in enumerate(model.layers): if first_output_name is None and "dropout" not in layer.name.lower() and "out" in layer.name.lower(): first_output_name = layer.name layer_idx = i if first_output_name is None: print("Failed to find the model layer named with 'out'. Skipping salient.") return False print("####################") print("Visualizing activations on layer:", first_output_name) print("####################") # ensure we have linear activation model.layers[layer_idx].activation = activations.linear # build salient model and optimizer sal_model = utils.apply_modifications(model) modifier_fn = get('guided') sal_model_mod = modifier_fn(sal_model) losses = [ (ActivationMaximization(sal_model_mod.layers[layer_idx], None), -1) ] self.opt = Optimizer(sal_model_mod.input, losses, norm_grads=False) return True def compute_visualisation_mask(self, img): grad_modifier = 'absolute' grads = self.opt.minimize(seed_input=img, max_iter=1, grad_modifier=grad_modifier, verbose=False)[1] channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 grads = np.max(grads, axis=channel_idx) res = utils.normalize(grads)[0] return res def draw_salient(self, img): import cv2 alpha = 0.004 beta = 1.0 - alpha expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape pred_img = img.astype(np.float32) / 255.0 # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1,)) salient_mask = self.compute_visualisation_mask(pred_img) z = np.zeros_like(salient_mask) salient_mask_stacked = np.dstack((z, z)) salient_mask_stacked = np.dstack((salient_mask_stacked, salient_mask)) blend = cv2.addWeighted(img.astype('float32'), alpha, salient_mask_stacked, beta, 0.0) return blend def make_frame(self, t): ''' Callback to return an image from from our tub records. This is called from the VideoClip as it references a time. We don't use t to reference the frame, but instead increment a frame counter. This assumes sequential access. ''' if self.iRec >= self.end or self.iRec >= len(self.index): return None rec_ix = self.index[self.iRec] rec = self.tub.get_record(rec_ix) image = rec['cam/image_array'] if self.cfg.ROI_CROP_TOP != 0 or self.cfg.ROI_CROP_BOTTOM != 0: image = img_crop(image, self.cfg.ROI_CROP_TOP, self.cfg.ROI_CROP_BOTTOM) if self.do_salient: image = self.draw_salient(image) image = image * 255 image = image.astype('uint8') self.draw_user_input(rec, image) if self.keras_part is not None: self.draw_model_prediction(rec, image) self.draw_steering_distribution(rec, image) if self.scale != 1: import cv2 h, w, d = image.shape dsize = (w * self.scale, h * self.scale) image = cv2.resize(image, dsize=dsize, interpolation=cv2.INTER_CUBIC) self.iRec += 1 # returns a 8-bit RGB array return image
class MakeMovie(BaseCommand): def __init__(self): self.deg_to_rad = math.pi / 180.0 def parse_args(self, args): parser = argparse.ArgumentParser(prog='makemovie') parser.add_argument('--tub', help='The tub to make movie from') parser.add_argument( '--out', default='tub_movie.mp4', help='The movie filename to create. default: tub_movie.mp4') parser.add_argument( '--config', default='./config.py', help='location of config file to use. default: ./config.py') parser.add_argument('--model', help='the model to use to show control outputs') parser.add_argument('--type', help='the model type to load') parser.add_argument( '--salient', action="store_true", help='should we overlay salient map showing avtivations') parser.add_argument('--start', type=int, default=1, help='first frame to process') parser.add_argument('--end', type=int, default=-1, help='last frame to process') parser.add_argument('--scale', type=int, default=2, help='make image frame output larger by X mult') parsed_args = parser.parse_args(args) return parsed_args, parser def run(self, args): ''' Load the images from a tub and create a movie from them. Movie ''' import moviepy.editor as mpy args, parser = self.parse_args(args) if args.tub is None: print("ERR>> --tub argument missing.") parser.print_help() return if args.model is not None and args.type is None: print("ERR>> --type argument missing.") parser.print_help() return if args.salient: if args.model is None in args.model: print( "ERR>> salient visualization requires a model. Pass with the --model arg." ) parser.print_help() return # imported like this, we make TF conditional on use of --salient # and we keep the context maintained throughout our callbacks to # compute the salient mask from tensorflow.python.keras import backend as K import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' conf = os.path.expanduser(args.config) if not os.path.exists(conf): print("No config file at location: %s. Add --config to specify\ location or run from dir containing config.py." % conf) return self.cfg = dk.load_config(conf) self.tub = Tub(args.tub) self.index = self.tub.get_index(shuffled=False) start = args.start self.end = args.end if args.end != -1 else len(self.index) if self.end >= len(self.index): self.end = len(self.index) - 1 num_frames = self.end - start self.iRec = start self.scale = args.scale self.keras_part = None self.convolution_part = None if not args.model is None: self.keras_part = get_model_by_type(args.type, cfg=self.cfg) self.keras_part.load(args.model) self.keras_part.compile() if args.salient: self.init_salient(self.keras_part.model) #This method nested in this way to take the conditional import of TF #in a manner that extends to this callback. Done this way, we avoid #importing in the below method, which triggers a new cuda device allocation #each call. def compute_visualisation_mask(img): #from https://github.com/ermolenkodev/keras-salient-object-visualisation activations = self.functor([np.array([img])]) activations = [ np.reshape( img, (1, img.shape[0], img.shape[1], img.shape[2])) ] + activations upscaled_activation = np.ones((3, 6)) for layer in [5, 4, 3, 2, 1]: averaged_activation = np.mean( activations[layer], axis=3).squeeze(axis=0) * upscaled_activation output_shape = (activations[layer - 1].shape[1], activations[layer - 1].shape[2]) x = tf.constant( np.reshape(averaged_activation, (1, averaged_activation.shape[0], averaged_activation.shape[1], 1)), tf.float32) conv = tf.nn.conv2d_transpose( x, self.layers_kernels[layer], output_shape=(1, output_shape[0], output_shape[1], 1), strides=self.layers_strides[layer], padding='VALID') with tf.Session() as session: result = session.run(conv) upscaled_activation = np.reshape(result, output_shape) final_visualisation_mask = upscaled_activation return (final_visualisation_mask - np.min(final_visualisation_mask)) / ( np.max(final_visualisation_mask) - np.min(final_visualisation_mask)) self.compute_visualisation_mask = compute_visualisation_mask print('making movie', args.out, 'from', num_frames, 'images') clip = mpy.VideoClip(self.make_frame, duration=((num_frames - 1) / self.cfg.DRIVE_LOOP_HZ)) clip.write_videofile(args.out, fps=self.cfg.DRIVE_LOOP_HZ) def draw_model_prediction(self, record, img): ''' query the model for it's prediction, draw the user input and the predictions as green and blue lines on the image ''' if self.keras_part is None: return import cv2 user_angle = float(record["user/angle"]) user_throttle = float(record["user/throttle"]) expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape pred_img = img # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1, )) actual = pred_img.shape if expected != actual: print("expected input dim", expected, "didn't match actual dim", actual) return pilot_angle, pilot_throttle = self.keras_part.run(pred_img) length = self.cfg.IMAGE_H a1 = user_angle * 45.0 l1 = user_throttle * length a2 = pilot_angle * 45.0 l2 = pilot_throttle * length mid = self.cfg.IMAGE_W // 2 - 1 p1 = tuple((mid - 2, self.cfg.IMAGE_H - 1)) p2 = tuple((mid + 2, self.cfg.IMAGE_H - 1)) p11 = tuple( (int(p1[0] + l1 * math.cos((a1 + 270.0) * self.deg_to_rad)), int(p1[1] + l1 * math.sin((a1 + 270.0) * self.deg_to_rad)))) p22 = tuple( (int(p2[0] + l2 * math.cos((a2 + 270.0) * self.deg_to_rad)), int(p2[1] + l2 * math.sin((a2 + 270.0) * self.deg_to_rad)))) # user is green, pilot is blue cv2.line(img, p1, p11, (0, 255, 0), 2) cv2.line(img, p2, p22, (0, 0, 255), 2) def draw_steering_distribution(self, record, img): ''' query the model for it's prediction, draw the distribution of steering choices ''' from donkeycar.parts.keras import KerasCategorical if self.keras_part is None or type( self.keras_part) is not KerasCategorical: return import cv2 pred_img = img.reshape((1, ) + img.shape) angle_binned, throttle = self.keras_part.model.predict(pred_img) x = 4 dx = 4 y = 120 - 4 iArgMax = np.argmax(angle_binned) for i in range(15): p1 = (x, y) p2 = (x, y - int(angle_binned[0][i] * 100.0)) if i == iArgMax: cv2.line(img, p1, p2, (255, 0, 0), 2) else: cv2.line(img, p1, p2, (200, 200, 200), 2) x += dx def init_salient(self, model): #from https://github.com/ermolenkodev/keras-salient-object-visualisation from tensorflow.python.keras.layers import Input, Dense, merge from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, Reshape, BatchNormalization from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense input_shape = model.inputs[0].shape img_in = Input(shape=(input_shape[1], input_shape[2], input_shape[3]), name='img_in') x = img_in x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu', name='conv1')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name='conv2')(x) x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu', name='conv3')(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name='conv4')(x) conv_5 = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu', name='conv5')(x) self.convolution_part = Model(inputs=[img_in], outputs=[conv_5]) for layer_num in ('1', '2', '3', '4', '5'): try: self.convolution_part.get_layer( 'conv' + layer_num).set_weights( model.get_layer('conv2d_' + layer_num).get_weights()) except Exception as e: print(e) print("Failed to load layer weights for layer", layer_num) raise Exception("Failed to load weights") from tensorflow.python.keras import backend as K import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' self.inp = self.convolution_part.input # input placeholder self.outputs = [ layer.output for layer in self.convolution_part.layers[1:] ] # all layer outputs self.functor = K.function([self.inp], self.outputs) kernel_3x3 = tf.constant( np.array([[[[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]]]]), tf.float32) kernel_5x5 = tf.constant( np.array([[[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]], [[[1]], [[1]], [[1]], [[1]], [[1]]]]), tf.float32) self.layers_kernels = { 5: kernel_3x3, 4: kernel_3x3, 3: kernel_5x5, 2: kernel_5x5, 1: kernel_5x5 } self.layers_strides = { 5: [1, 1, 1, 1], 4: [1, 2, 2, 1], 3: [1, 2, 2, 1], 2: [1, 2, 2, 1], 1: [1, 2, 2, 1] } def draw_salient(self, img): # from https://github.com/ermolenkodev/keras-salient-object-visualisation import cv2 alpha = 0.004 beta = 1.0 - alpha expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape pred_img = img # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1, )) actual = pred_img.shape salient_mask = self.compute_visualisation_mask(pred_img) salient_mask_stacked = np.dstack((salient_mask, salient_mask)) salient_mask_stacked = np.dstack((salient_mask_stacked, salient_mask)) blend = cv2.addWeighted(img.astype('float32'), alpha, salient_mask_stacked, beta, 0.0) return blend def make_frame(self, t): ''' Callback to return an image from from our tub records. This is called from the VideoClip as it references a time. We don't use t to reference the frame, but instead increment a frame counter. This assumes sequential access. ''' if self.iRec >= self.end or self.iRec >= len(self.index): return None rec_ix = self.index[self.iRec] rec = self.tub.get_record(rec_ix) image = rec['cam/image_array'] if self.convolution_part: image = self.draw_salient(image) image = image * 255 image = image.astype('uint8') self.draw_model_prediction(rec, image) self.draw_steering_distribution(rec, image) if self.scale != 1: import cv2 h, w, d = image.shape dsize = (w * self.scale, h * self.scale) image = cv2.resize(image, dsize=dsize, interpolation=cv2.INTER_CUBIC) self.iRec += 1 # returns a 8-bit RGB array return image
class TubFormat(DriveFormat): """ A class to represent a DonkeyCar Tub drive on disc. Current assumptions: Tub records are 1 indexed and sequential with no gaps. We only care about editing steering and throttle. Steering and throttle should be clipped to -1/1. TODO: Change actions to be a dictionary of dictionaries, with the outer key being a record's real index. Images would need to be included in that (which is how the Tub class does it). """ def __init__( self, path ): DriveFormat.__init__(self) if not os.path.exists(path): raise IOError( "TubFormat directory does not exist: {}".format( path ) ) if not os.path.isdir(path): raise IOError( "TubFormat path is not a directory: {}".format( path ) ) self.path = path self.tub = Tub(path) self.meta = self.tub.meta self.edit_list = set() self.shape = None self.auxMeta = {} self.aux_clean = True #(self.images, self.actions) = self._load(path) def _load( self, path, image_norm=True, progress=None ): records = {} indexes = self.tub.get_index(shuffled=False) for idx in indexes: rec = self.tub.get_record(idx) if self.shape is None: self.shape = rec['cam/image_array'].shape records[idx] = rec self.records = records self.indexes = indexes if 'auxiliary' in self.tub.meta: self.auxMeta = self.tub.meta['auxiliary'] # "sim/info": {"done": true, "pos": [32.82384, 5.567082, -9.720116], "reward": -1.0, "hit": "none", "cte": 2.948259, "speed": 9.52644} if 'sim/info' in self.tub.meta['inputs']: self.auxMeta['done'] = { "name": "done", "type": "categorical", "categories": ["True", "False"]} self.auxMeta['reward'] = { "name": "reward", "type": "continuous"} def load( self, progress=None ): self._load(self.path, progress=progress) self.setClean() def save( self ): if not self.aux_clean: # update meta with new aux meta and write it out for name, aux in self.auxMeta.items(): if name not in self.tub.meta['inputs']: self.tub.meta['inputs'].append(name) if 'continuous' == aux['type']: aux_type = 'float' elif 'categorical' == aux['type']: aux_type = 'int' else: raise ValueError( "Unknown auxiliary data type: {}".format( aux['type'] ) ) self.tub.meta['types'].append(aux_type) self.tub.meta['auxiliary'] = self.auxMeta with open(self.tub.meta_path, 'w') as f: json.dump(self.tub.meta, f) self.aux_clean = True if self.isClean(): return self.tub.write_exclude() for ix in self.edit_list: rec = self.records[ix] path = self.tub.get_json_record_path(ix) try: with open(path, 'r') as fp: old_rec = json.load(fp) except TypeError: print('troubles with record:', path) except FileNotFoundError: raise except: print("Unexpected error:", sys.exc_info()[0]) raise # Copy over only the keys we might have modified chg_keys = ['user/angle', 'user/throttle', 'orig/angle', 'orig/throttle'] for key in chg_keys: if key in rec: old_rec[key] = rec[key] # Now do any auxiliary data for key in self.auxMeta.keys(): if key in rec: if rec[key] is None: old_rec.pop(key,None) else: #if self.auxMeta[key]['type'] == "categorical": # val = self.auxMeta[key]['categories'].index(rec[key]) #else: # val = rec[key] old_rec[key] = rec[key] try: with open(path, 'w') as fp: json.dump(old_rec, fp) except TypeError: print('troubles with record:', path) except FileNotFoundError: raise except: print("Unexpected error:", sys.exc_info()[0]) raise self.edit_list.clear() self.setClean() def count( self ): return len(self.records) def imageForIndex( self, index ): idx = self.indexes[index] img = self.records[idx]['cam/image_array'] if self.tub.excluded(index + 1): # This grayed out image ends up looking ugly, can't figure out why tmp = img.mean(axis=-1,dtype=img.dtype,keepdims=False) tmp = np.repeat( tmp[:,:,np.newaxis], 3, axis=2 ) return tmp return img def actionForIndex( self, index ): idx = self.indexes[index] rec = self.records[idx] angle, throttle = Tub.get_angle_throttle(rec) return [angle, throttle] def setActionForIndex( self, new_action, index ): idx = self.indexes[index] rec = self.records[idx] angle, throttle = Tub.get_angle_throttle(rec) old_action = [angle, throttle] if not np.array_equal( old_action, new_action ): if (rec["user/angle"] != new_action[0]) or (rec["user/throttle"] != new_action[1]): # Save the original values if not already done if "orig/angle" not in rec: rec["orig/angle"] = rec["user/angle"] if "orig/throttle" not in rec: rec["orig/throttle"] = rec["user/throttle"] rec["user/angle"] = new_action[0] rec["user/throttle"] = new_action[1] self.edit_list.add(idx) self.setDirty() def actionForKey(self,keybind,oldAction=None): oldAction = copy.copy(oldAction) if keybind == 'w': oldAction[1] += 0.1 elif keybind == 'x': oldAction[1] -= 0.1 elif keybind == 'a': oldAction[0] -= 0.1 elif keybind == 'd': oldAction[0] += 0.1 elif keybind == 's': oldAction[0] = 0.0 oldAction[1] = 0.0 else: return None return np.clip(oldAction, -1.0, 1.0) def deleteIndex( self, index ): if index >= 0 and index < self.count(): index += 1 if self.tub.excluded(index): self.tub.include_index(index) else: self.tub.exclude_index(index) self.setDirty() def isIndexDeleted(self, index): if index >= 0 and index < self.count(): index += 1 return self.tub.excluded(index) return False def metaString(self): #{"inputs": ["cam/image_array", "user/angle", "user/throttle", "user/mode"], "start": 1550950724.8622544, "types": ["image_array", "float", "float", "str"]} ret = "" for k, v in self.tub.meta.items(): ret += "{}: {}\n".format( k, v ) return ret def actionStats(self): stats = defaultdict(int) if self.count() > 0: actions = [] for i in range(self.count()): act = self.actionForIndex( i ) actions.append(act) stats["Min"] = np.min(actions) stats["Max"] = np.max(actions) stats["Mean"] = np.mean(actions) stats["StdDev"] = np.std(actions) return stats def supportsAuxData(self): return True def getAuxMeta(self): return self.auxMeta def addAuxData(self, meta): # TODO Check to make sure the meta data is all the same if meta["name"] not in self.auxMeta: self.auxMeta[meta["name"]] = meta self.aux_clean = False def auxDataAtIndex(self, auxName, index): if not auxName in self.auxMeta: return None idx = self.indexes[index] rec = self.records[idx] if auxName in rec: if rec[auxName] is not None and self.auxMeta[auxName]['type'] == "categorical": return self.auxMeta[auxName]['categories'][rec[auxName]] return rec[auxName] elif 'sim/info' in rec and auxName in rec['sim/info']: rec = rec['sim/info'] if rec[auxName] is not None and self.auxMeta[auxName]['type'] == "categorical": return str(rec[auxName]) return rec[auxName] return None def setAuxDataAtIndex(self, auxName, auxData, index): if not auxName in self.auxMeta: return False idx = self.indexes[index] rec = self.records[idx] if auxName not in rec or rec[auxName] != auxData: if auxData is not None and self.auxMeta[auxName]['type'] == "categorical": auxData = self.auxMeta[auxName]['categories'].index(auxData) rec[auxName] = auxData self.edit_list.add(idx) self.setDirty() return True @classmethod def canOpenFile( cls, path ): if not os.path.exists(path): return False if not os.path.isdir(path): return False meta_file = os.path.join( path, "meta.json" ) if not os.path.exists(meta_file): return False #if os.path.splitext(path)[1] == ".tub": # return True return True @staticmethod def defaultInputTypes(): return [{"name":"Images", "type":"numpy image", "shape":(120,160,3)}] def inputTypes(self): res = TubFormat.defaultInputTypes() if self.shape is not None: res[0]["shape"] = self.shape return res @staticmethod def defaultOutputTypes(): return [{"name":"Actions", "type":"continuous", "range":(-1.0,1.0)}] def outputTypes(self): res = [] for act in ["user/angle", "user/throttle"]: display_name = act.split("/")[1] res.append( {"name":display_name, "type":"continuous", "range":(-1.0,1.0)} ) return res