def predict_on_image(self, image_path, output_dir=None): """ Detect, draw detections and save result to output folder. Args: image_path: Path to image. output_dir: Path to output dir, defaults to output/detections Returns: Output path. """ image_path = get_abs_path(image_path, verify=True) image_name = os.path.basename(image_path) image_data = tf.image.decode_image(open(image_path, 'rb').read(), channels=3) detections, adjusted = self.detect_image(image_data, image_name) self.draw_on_image(adjusted, detections) if output_dir: output_path = get_abs_path(output_dir, f'predicted-{image_name}', create_parents=True) else: output_dir = get_abs_path('output', 'detections') output_path = get_abs_path(output_dir, f'predicted-{image_name}', create_parents=True) cv2.imwrite(output_path, adjusted) return output_path
def visualize_boxes(relative_anchors, sample_image=None, save_result=True): """ Visualize anchor boxes output of k-means. Args: relative_anchors: output of k-means. sample_image: Path to image to display as background. save_result: If True, figure will be saved Returns: None """ title = 'Generated anchors relative to sample image size' if os.path.exists(get_abs_path('output', 'plots', f'{title}.png')): return img = cv2.imread(get_abs_path(sample_image)) if img is None: img = np.ones * 255 width, height = imagesize.get(sample_image) center = int(width / 2), int(height / 2) for relative_w, relative_h in relative_anchors: box_width = relative_w * width box_height = relative_h * height x0 = int(center[0] - (box_width / 2)) y0 = int(center[1] - (box_height / 2)) x1 = int(x0 + box_width) y1 = int(y0 + box_height) cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 4) plt.imshow(img) plt.grid() plt.title(title) save_fig(title, save_result)
def adjust_non_voc_csv(csv_file, image_path, image_width, image_height): """ Read relative data and return adjusted frame accordingly. Args: csv_file: .csv file containing the following columns: [image, object_name, object_index, bx, by, bw, bh] image_path: Path prefix to be added. image_width: image width. image_height: image height Returns: pandas DataFrame with the following columns: ['image_path', 'object_name', 'img_width', 'img_height', 'x_min', 'y_min', 'x_max', 'y_max', 'relative_width', 'relative_height', 'object_id'] """ image_path = get_abs_path(image_path, verify=True) coordinates = [] old_frame = pd.read_csv(get_abs_path(csv_file, verify=True)) new_frame = pd.DataFrame() new_frame['image_path'] = old_frame['image'].apply( lambda item: get_abs_path(image_path, item)) new_frame['object_name'] = old_frame['object_name'] new_frame['img_width'] = image_width new_frame['img_height'] = image_height new_frame['relative_width'] = old_frame['bw'] new_frame['relative_height'] = old_frame['bh'] new_frame['object_id'] = old_frame['object_index'] + 1 for index, row in old_frame.iterrows(): image, object_name, object_index, bx, by, bw, bh = row co = ratios_to_coordinates(bx, by, bw, bh, image_width, image_height) coordinates.append(co) ( new_frame['x_min'], new_frame['y_min'], new_frame['x_max'], new_frame['y_max'], ) = np.array(coordinates).T new_frame[['x_min', 'y_min', 'x_max', 'y_max']] = new_frame[['x_min', 'y_min', 'x_max', 'y_max']].astype('int64') print(f'Parsed labels:\n{new_frame["object_name"].value_counts()}') classes = new_frame['object_name'].drop_duplicates() LOGGER.info( f'Adjustment from existing received {len(new_frame)} labels containing ' f'{len(classes)} classes') LOGGER.info(f'Added prefix to images: {image_path}') return new_frame[[ 'image_path', 'object_name', 'img_width', 'img_height', 'x_min', 'y_min', 'x_max', 'y_max', 'relative_width', 'relative_height', 'object_id', ]]
def __init__( self, input_shape, model_configuration, classes_file, train_tf_record=None, valid_tf_record=None, anchors=None, masks=None, max_boxes=100, iou_threshold=0.5, score_threshold=0.5, image_folder=None, ): """ Initialize trainer. Args: input_shape: tuple, (n, n, c) model_configuration: Path to yolo DarkNet configuration .cfg file. classes_file: Path to file containing dataset classes. train_tf_record: Path to training tfrecord. valid_tf_record: Path to validation tfrecord. anchors: numpy array of (w, h) pairs. masks: numpy array of masks. max_boxes: Maximum boxes of the tfrecords provided(if any) or maximum boxes setting. iou_threshold: float, values less than the threshold are ignored. score_threshold: float, values less than the threshold are ignored. image_folder: Folder that contains images, defaults to data/photos. """ if image_folder: self.image_folder = get_abs_path(image_folder, verify=True) if not image_folder: self.image_folder = get_abs_path('data', 'photos', verify=True) assert (len((images := get_image_files(self.image_folder))) > 1), f'Empty image folder: {self.image_folder}' self.image_width, self.image_height = imagesize.get(images[0]) self.classes_file = get_abs_path(classes_file, verify=True) self.class_names = [ item.strip() for item in open(self.classes_file).readlines() ] super().__init__( input_shape, model_configuration, len(self.class_names), anchors, masks, max_boxes, iou_threshold, score_threshold, ) self.train_tf_record = train_tf_record self.valid_tf_record = valid_tf_record if train_tf_record: self.train_tf_record = get_abs_path(train_tf_record, verify=True) if valid_tf_record: self.valid_tf_record = get_abs_path(valid_tf_record, verify=True)
def detect_video(self, video, trained_weights, codec='mp4v', display=False, output_dir=None): """ Perform detection on a video, stream(optional) and save results. Args: video: Path to video file. trained_weights: .tf or .weights file codec: str ex: mp4v display: If True, detections will be displayed during the detection operation. output_dir: Path to output dir, defaults to output/detections Returns: None """ self.create_models( reverse_v4=True if trained_weights.endswith('tf') else False) self.load_weights(trained_weights) vid = cv2.VideoCapture(video) length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) current = 1 codec = cv2.VideoWriter_fourcc(*codec) out = (get_abs_path( output_dir, 'predicted_vid.mp4', create_parents=True) if output_dir else get_abs_path('output', 'detections', 'predicted_vid.mp4', create_parents=True)) writer = cv2.VideoWriter(out, codec, fps, (width, height)) while vid.isOpened(): _, frame = vid.read() detections, adjusted = self.detect_image(frame, f'frame_{current}') self.draw_on_image(adjusted, detections) writer.write(adjusted) completed = f'{(current / length) * 100}% completed' print( f'\rframe {current}/{length}\tdetections: ' f'{len(detections)}\tcompleted: {completed}', end='', ) if display: cv2.destroyAllWindows() cv2.imshow(f'frame {current}', adjusted) current += 1 if cv2.waitKey(1) == ord('q'): LOGGER.info(f'Video detection aborted {current}/{length} ' f'frames completed') break
def save_tfr(data, output_folder, dataset_name, test_size, trainer=None): """ Transform and save dataset into TFRecord format. Args: data: pandas DataFrame with adjusted labels. output_folder: Path to folder where TFRecord(s) will be saved. dataset_name: str name of the dataset. test_size: relative test subset size. trainer: core.Trainer object Returns: None """ assert (0 < test_size < 1), f'test_size must be 0 < test_size < 1 and {test_size} is given' data['object_name'] = data['object_name'].apply( lambda x: x.encode('utf-8')) data['object_id'] = data['object_id'].astype(int) data[data.dtypes[data.dtypes == 'int64'].index] = data[data.dtypes[ data.dtypes == 'int64'].index].apply(abs) data.to_csv( get_abs_path('data', 'tfrecords', 'full_data.csv', create_parents=True), index=False, ) groups = np.array(data.groupby('image_path')) np.random.shuffle(groups) separation_index = int((1 - test_size) * len(groups)) training_set = groups[:separation_index] test_set = groups[separation_index:] training_frame = pd.concat([item[1] for item in training_set]) test_frame = pd.concat([item[1] for item in test_set]) training_frame.to_csv( get_abs_path('data', 'tfrecords', 'training_data.csv', create_parents=True), index=False, ) test_frame.to_csv( get_abs_path('data', 'tfrecords', 'test_data.csv', create_parents=True), index=False, ) training_path = get_abs_path(output_folder, f'{dataset_name}_train.tfrecord') test_path = get_abs_path(output_folder, f'{dataset_name}_test.tfrecord') write_tf_record(training_path, training_set, data, trainer) LOGGER.info(f'Saved training TFRecord: {training_path}') write_tf_record(test_path, test_set, data, trainer) LOGGER.info(f'Saved validation TFRecord: {test_path}')
def __init__( self, labels_file, augmentation_map, workers=32, converted_coordinates_file=None, image_folder=None, ): """ Initialize augmentation session. Args: labels_file: cvv file containing relative image labels augmentation_map: A structured dictionary containing categorized augmentation sequences. workers: Parallel threads. converted_coordinates_file: csv file containing converted from relative to coordinates. image_folder: Folder containing images other than data/photos/ """ assert all([ia, iaa, iap]) self.labels_file = get_abs_path(labels_file, verify=True) self.mapping = pd.read_csv(self.labels_file) if image_folder: self.image_folder = get_abs_path(image_folder, verify=True) else: self.image_folder = get_abs_path('data', 'photos', create_parents=True) self.image_paths = get_image_files(self.image_folder) self.image_paths_copy = self.image_paths.copy() if not self.image_paths: LOGGER.error( f'Augmentation aborted: no photos found in {self.image_folder}' ) raise ValueError(f'No photos given') self.image_width, self.image_height = imagesize.get( self.image_paths[0]) self.converted_coordinates = (pd.read_csv(converted_coordinates_file) if converted_coordinates_file else self.relative_to_coordinates()) self.converted_groups = self.converted_coordinates.groupby('image') self.augmentation_data = [] self.augmentation_sequences = [] self.augmentation_map = augmentation_map self.workers = workers self.augmented_images = 0 self.total_images = len(self.image_paths) self.session_id = np.random.randint(10**6, (10**7))
def create_new_dataset(self, new_dataset_conf): """ Create a new TFRecord dataset. Args: new_dataset_conf: A dictionary containing the following keys: - dataset_name(required) str representing a name for the dataset - test_size(required) ex: 0.1 - augmentation(optional) True or False - sequences(required if augmentation is True) - aug_workers(optional if augmentation is True) defaults to 32. - aug_batch_size(optional if augmentation is True) defaults to 64. And one of the following is required: - relative_labels: Path to csv file with the following columns: ['image', 'object_name', 'object_index', 'bx', 'by', 'bw', 'bh'] - coordinate_labels: Path to csv file with the following columns: ['image_path', 'object_name', 'img_width', 'img_height', 'x_min', 'y_min', 'x_max', 'y_max', 'relative_width', 'relative_height', 'object_id'] - xml_labels_folder: Path to folder containing xml labels. """ LOGGER.info(f'Generating new dataset ...') test_size = new_dataset_conf.get('test_size') labels_frame = self.generate_new_frame(new_dataset_conf) save_tfr( labels_frame, get_abs_path('data', 'tfrecords', create_parents=True), new_dataset_conf['dataset_name'], test_size, self, )
def adjust_frame(frame, cache_file=None): """ Add relative width, relative height and object ids to annotation pandas DataFrame. Args: frame: pandas DataFrame containing coordinates instead of relative labels. cache_file: cache_file: csv file name containing current session labels. Returns: Frame with the new columns """ object_id = 1 for item in frame.columns[2:]: frame[item] = frame[item].astype(float).astype(int) frame['relative_width'] = (frame['x_max'] - frame['x_min']) / frame['img_width'] frame['relative_height'] = (frame['y_max'] - frame['y_min']) / frame['img_height'] for object_name in list(frame['object_name'].drop_duplicates()): frame.loc[frame['object_name'] == object_name, 'object_id'] = object_id object_id += 1 if cache_file: frame.to_csv(get_abs_path('output', 'data', cache_file, create_parents=True), index=False) LOGGER.info(f'Parsed labels:\n{frame["object_name"].value_counts()}') return frame
def detect(parser): """ Detect, draw boxes over an image / a folder of images / a video and save Args: parser: argparse.ArgumentParser Returns: None """ cli_args = add_all_args(parser, DETECTION) detector = Detector( input_shape=cli_args.input_shape, model_configuration=cli_args.model_cfg, classes_file=cli_args.classes, max_boxes=cli_args.max_boxes, iou_threshold=cli_args.iou_threshold, score_threshold=cli_args.score_threshold, ) check_args = [ item for item in [cli_args.image, cli_args.image_dir, cli_args.video] if item ] assert (len(check_args) == 1 ), 'Expected --image or --image-dir or --video, got more than one' target_photos = [] if cli_args.image: target_photos.append(get_abs_path(cli_args.image)) if cli_args.image_dir: target_photos.extend( get_abs_path(cli_args.image_dir, image) for image in get_image_files(cli_args.image_dir)) if cli_args.image or cli_args.image_dir: detector.predict_photos( photos=target_photos, trained_weights=cli_args.weights, batch_size=cli_args.process_batch_size, workers=cli_args.workers, output_dir=cli_args.output_dir, ) if cli_args.video: detector.detect_video( video=get_abs_path(cli_args.video, verify=True), trained_weights=get_abs_path(cli_args.weights, verify=True), codec=cli_args.codec, display=cli_args.display_vid, output_dir=cli_args.output_dir, )
def set_voc_tags( tree='annotation', folder='folder', filename='filename', path='path', size='size', width='width', height='height', depth='depth', obj='object', obj_name='name', box='bndbox', x0='xmin', y0='ymin', x1='xmax', y1='ymax', conf_file='voc_conf.json', indent=4, sort_keys=False, ): """ Create/modify json voc annotation tags. Args: tree: xml tree tag. folder: Image folder tag. filename: Image file tag. path: Path to image tag. size: Image size tag. width: Image width tag. height: Image height tag. depth: Image depth tag. obj: Object tag. obj_name: Object name tag. box: Bounding box tag. x0: Start x coordinate tag. y0: Start y coordinate tag. x1: End x coordinate tag. y1: End y coordinate tag. conf_file: Configuration file name. indent: json output indent. sort_keys: Sort json output keys. Returns: None. """ if (conf_file := get_abs_path(conf_file, verify=True)) in os.listdir(): os.remove(conf_file)
def clear_outputs(): """ Clear output folder. Returns: None """ for folder_name in os.listdir(get_abs_path('output', verify=True)): if not folder_name.startswith('.'): full_path = get_abs_path('output', folder_name) for file_name in os.listdir(full_path): full_file_path = get_abs_path(full_path, file_name) if os.path.isdir(full_file_path): shutil.rmtree(full_file_path) else: os.remove(full_file_path) LOGGER.info(f'Deleted old output: {full_file_path}')
def parse_voc_folder(folder_path, voc_conf): """ Parse a folder containing voc xml annotation files. Args: folder_path: Folder containing voc xml annotation files. voc_conf: Path to voc json configuration file. Returns: pandas DataFrame with the annotations. """ folder_path = get_abs_path(folder_path, verify=True) cache_path = get_abs_path('output', 'data', 'parsed_from_xml.csv') if os.path.exists(cache_path): frame = pd.read_csv(cache_path) LOGGER.info(f'Labels retrieved from cache:' f'\n{frame["object_name"].value_counts()}') return frame image_data = [] frame_columns = [ 'image_path', 'object_name', 'img_width', 'img_height', 'x_min', 'y_min', 'x_max', 'y_max', ] xml_files = [ get_abs_path(folder_path, file_name) for file_name in os.listdir(folder_path) if file_name.endswith('.xml') ] for file_name in xml_files: annotation_path = get_abs_path(folder_path, file_name) image_labels = parse_voc_file(annotation_path, voc_conf) image_data.extend(image_labels) frame = pd.DataFrame(image_data, columns=frame_columns) classes = frame['object_name'].drop_duplicates() LOGGER.info(f'Read {len(xml_files)} xml files') LOGGER.info(f'Received {len(frame)} labels containing ' f'{len(classes)} classes') if frame.empty: raise ValueError(f'No labels were found in {folder_path}') frame = adjust_frame(frame, 'parsed_from_xml.csv') return frame
def predict_photos(self, photos, trained_weights, batch_size=32, workers=16, output_dir=None): """ Predict a list of image paths and save results to output folder. Args: photos: A list of image paths. trained_weights: .weights or .tf file batch_size: Prediction batch size. workers: Parallel predictions. output_dir: Path to output dir, defaults to output/detections Returns: None """ self.create_models( reverse_v4=True if trained_weights.endswith('tf') else False) self.load_weights(get_abs_path(trained_weights, verify=True)) to_predict = photos.copy() saved_paths = set() with ThreadPoolExecutor(max_workers=workers) as executor: predicted = 1 total_photos = len(photos) while to_predict: current_batch = [ to_predict.pop() for _ in range(min(batch_size, len(to_predict))) if to_predict ] future_predictions = { executor.submit( self.predict_on_image, image, output_dir, ): image for image in current_batch } for future_prediction in as_completed(future_predictions): saved_path = future_prediction.result() saved_paths.add(saved_path) completed = f'{predicted}/{total_photos}' current_image = future_predictions[future_prediction] percent = (predicted / total_photos) * 100 print( f'\rpredicting {os.path.basename(current_image)} ' f'{completed}\t{percent}% completed', end='', ) predicted += 1 print() for saved_path in saved_paths: LOGGER.info(f'Saved prediction: {saved_path}')
def create_callbacks(checkpoint_path): """ Create a list of tf.keras.callbacks. Args: checkpoint_path: Full path to checkpoint. Returns: callbacks. """ return [ ReduceLROnPlateau(verbose=1, patience=4), ModelCheckpoint( get_abs_path(checkpoint_path), verbose=1, save_weights_only=True, ), TensorBoard(log_dir=get_abs_path( 'data', 'tfrecords', create_parents=True)), EarlyStopping(monitor='val_loss', patience=6, verbose=1), ]
def get_adjusted_labels(self, configuration): """ Adjust labels according to given configuration. Args: configuration: A dictionary containing any of the following keys: - relative_labels - xml_labels_folder - voc_conf (required if xml_labels_folder) - coordinate_labels Returns: pandas DataFrame with adjusted labels. """ labels_frame = None check = 0 if configuration.get('relative_labels'): labels_frame = adjust_non_voc_csv( configuration['relative_labels'], self.image_folder, self.image_width, self.image_height, ) check += 1 if xml_folder := configuration.get('xml_labels_folder'): if check: raise ValueError(f'Got more than one configuration') voc_conf = configuration.get('voc_conf') assert voc_conf, f'Missing VOC configuration json file.' labels_frame = parse_voc_folder( xml_folder, get_abs_path(voc_conf, verify=True), ) labels_frame.to_csv( get_abs_path('output', 'data', 'parsed_from_xml.csv', create_parents=True), index=False, ) check += 1
def on_epoch_end(self, epoch, logs=None): """ Start evaluation in valid epochs. Args: epoch: int, epoch number. logs: dict, TensorBoard log. Returns: None """ if not (epoch + 1) % self.n_epochs == 0: return self.evaluate(*self.evaluation_args) evaluation_dir = get_abs_path('output', 'evaluation', f'epoch-{epoch}-evaluation', create=True) current_predictions = [ get_abs_path('output', 'data', item) for item in os.listdir(get_abs_path('output', 'data', verify=True)) ] current_figures = [ get_abs_path('output', 'plots', item) for item in os.listdir(get_abs_path('output', 'plots')) ] current_files = current_predictions + current_figures for file_path in current_files: if os.path.isfile(file_path): file_name = os.path.basename(file_path) new_path = get_abs_path(evaluation_dir, file_name) shutil.move(file_path, new_path)
def parse_voc_file(file_path, voc_conf): """ Parse voc annotation from xml file. Args: file_path: Path to xml file. voc_conf: voc configuration file. Returns: A list of image annotations. """ file_path = get_abs_path(file_path, verify=True) voc_conf = get_abs_path(voc_conf, verify=True) image_data = [] with open(voc_conf) as json_data: tags = json.load(json_data) tree = ElementTree.parse(file_path) image_path = get_tree_item(tree, tags['tree']['path'], file_path).text size_item = get_tree_item(tree, tags['size']['size_tag'], file_path) image_width = get_tree_item(size_item, tags['size']['width'], file_path).text image_height = get_tree_item(size_item, tags['size']['height'], file_path).text for item in get_tree_item(tree, tags['object']['object_tag'], file_path, True): name = get_tree_item(item, tags['object']['object_name'], file_path).text box_item = get_tree_item( item, tags['object']['object_box']['object_box_tag'], file_path) x0 = get_tree_item(box_item, tags['object']['object_box']['x0'], file_path).text y0 = get_tree_item(box_item, tags['object']['object_box']['y0'], file_path).text x1 = get_tree_item(box_item, tags['object']['object_box']['x1'], file_path).text y1 = get_tree_item(box_item, tags['Object']['object_box']['y1'], file_path).text image_data.append( [image_path, name, image_width, image_height, x0, y0, x1, y1]) return image_data
def visualized(*args, **kwargs): result = to_visualize(*args, **kwargs) if to_visualize.__name__ in ['parse_voc_folder', 'adjust_non_voc_csv']: visualize_box_relative_sizes(result) plt.show() if to_visualize.__name__ == 'k_means': all_args = list(kwargs.values()) + list(args) if not any([isinstance(item, pd.DataFrame) for item in all_args]): return result visualize_k_means_output(*result) plt.show() visualize_boxes(result[0], get_abs_path('samples', 'sample_image.png')) plt.show() return result
def load_image(image_path, new_size=None): """ Load image. Args: image_path: Path to image to load. new_size: new image dimensions(tuple). Returns: numpy array(image), image_path """ image_path = get_abs_path(image_path, verify=True) image = cv2.imread(image_path) if image is None: LOGGER.warning(f'Failed to read image: {image_path}') return if new_size: return cv2.resize(image, new_size) return image, image_path
def augment_photos_folder(self, batch_size=64, new_size=None): """ Augment photos in data/photos/ Args: batch_size: Size of each augmentation batch. new_size: tuple, new image size. Returns: None """ LOGGER.info(f'Started augmentation with {self.workers} workers') LOGGER.info(f'Total images to augment: {self.total_images}') LOGGER.info(f'Session assigned id: {self.session_id}') with ThreadPoolExecutor(max_workers=self.workers) as executor: while self.image_paths_copy: current_batch, current_paths = self.load_batch( new_size, batch_size) future_augmentations = { executor.submit(self.augment_image, image, path): path for image, path in zip(current_batch, current_paths) } for future_augmented in as_completed(future_augmentations): future_augmented.result() LOGGER.info(f'Augmentation completed') augmentation_frame = pd.DataFrame(self.augmentation_data, columns=self.mapping.columns) saving_path = get_abs_path('output', 'data', f'augmented_data_plus_original.csv', create_parents=True) combined = pd.concat([self.mapping, augmentation_frame]) for item in ['bx', 'by', 'bw', 'bh']: combined = combined.drop(combined[combined[item] > 1].index) combined.to_csv(saving_path, index=False) LOGGER.info(f'Saved old + augmented labels to {saving_path}') adjusted_combined = adjust_non_voc_csv(saving_path, self.image_folder, self.image_width, self.image_height) adjusted_saving_path = saving_path.replace('augmented', 'adjusted_aug') adjusted_combined.to_csv(adjusted_saving_path, index=False) LOGGER.info( f'Saved old + augmented (adjusted) labels to {adjusted_saving_path}' ) return adjusted_combined
def save_fig(title, save_figures=True): """ Save generated figures to output folder. Args: title: Figure title also the image to save file name. save_figures: If True, figure will be saved Returns: None """ if save_figures: saving_path = get_abs_path( 'output', 'plots', f'{title}.png', create_parents=True ) if os.path.exists(saving_path): return plt.savefig(saving_path) LOGGER.info(f'Saved figure {saving_path}') plt.close()
def visualize_k_means_output(centroids, frame, save_result=True): """ Visualize centroids and anchor box dimensions calculated. Args: centroids: 2D array of shape(k, 2) output of k-means. frame: pandas DataFrame with the annotation data. save_result: If True, figure will be saved Returns: None """ title = f'{centroids.shape[0]} Centroids representing relative anchor sizes.' if os.path.exists(get_abs_path('output', 'plots', f'{title}.png')) or (frame is None): return fig, ax = plt.subplots() visualize_box_relative_sizes(frame) plt.title(title) ax.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='black') save_fig(title, save_result)
def visualize_box_relative_sizes(frame, save_result=True): """ Scatter plot annotation box relative sizes. Args: frame: pandas DataFrame with the annotation data. save_result: If True, figure will be saved Returns: None """ title = f'Relative width and height for {frame.shape[0]} boxes.' if os.path.exists(get_abs_path('output', 'plots', f'{title}.png')) or (frame is None): return sns.scatterplot( x=frame['relative_width'], y=frame['relative_height'], hue=frame['object_name'], palette='gist_rainbow', ) plt.title(title) save_fig(title, save_result)
def augment_image(self, image, image_path): """ Perform augmentation and save image. Args: image: image to augment. image_path: Path to image. Returns: None """ current_sequence = 1 for augmentation_sequence in self.augmentation_sequences: new_image_name = ( f'aug-{self.session_id}-sequence-{current_sequence}' f'-{os.path.basename(image_path)}') new_image_path = get_abs_path(self.image_folder, new_image_name) bbs, frame_before = self.get_bounding_boxes_over_image(image_path) augmented_image, augmented_boxes = augmentation_sequence( image=image, bounding_boxes=bbs) self.update_data( augmented_boxes, frame_before, augmented_image, new_image_name, new_image_path, ) current_sequence += 1 self.augmented_images += 1 current = os.path.basename(image_path) completed = (f'{self.augmented_images}/' f'{self.total_images * len(self.augmentation_sequences)}') percent = (self.augmented_images / (self.total_images * len(self.augmentation_sequences)) * 100) print( f'\raugmenting {current}\t{completed}\t{percent}% completed', end='', )
class Trainer(BaseModel): """ Create a training instance. """ def __init__( self, input_shape, model_configuration, classes_file, train_tf_record=None, valid_tf_record=None, anchors=None, masks=None, max_boxes=100, iou_threshold=0.5, score_threshold=0.5, image_folder=None, ): """ Initialize trainer. Args: input_shape: tuple, (n, n, c) model_configuration: Path to yolo DarkNet configuration .cfg file. classes_file: Path to file containing dataset classes. train_tf_record: Path to training tfrecord. valid_tf_record: Path to validation tfrecord. anchors: numpy array of (w, h) pairs. masks: numpy array of masks. max_boxes: Maximum boxes of the tfrecords provided(if any) or maximum boxes setting. iou_threshold: float, values less than the threshold are ignored. score_threshold: float, values less than the threshold are ignored. image_folder: Folder that contains images, defaults to data/photos. """ if image_folder: self.image_folder = get_abs_path(image_folder, verify=True) if not image_folder: self.image_folder = get_abs_path('data', 'photos', verify=True) assert (len((images := get_image_files(self.image_folder))) > 1), f'Empty image folder: {self.image_folder}' self.image_width, self.image_height = imagesize.get(images[0]) self.classes_file = get_abs_path(classes_file, verify=True) self.class_names = [item.strip() for item in open(self.classes_file)] super().__init__( input_shape, model_configuration, len(self.class_names), anchors, masks, max_boxes, iou_threshold, score_threshold, ) self.train_tf_record = train_tf_record self.valid_tf_record = valid_tf_record if train_tf_record: self.train_tf_record = get_abs_path(train_tf_record, verify=True) if valid_tf_record: self.valid_tf_record = get_abs_path(valid_tf_record, verify=True) def get_adjusted_labels(self, configuration): """ Adjust labels according to given configuration. Args: configuration: A dictionary containing any of the following keys: - relative_labels - xml_labels_folder - voc_conf (required if xml_labels_folder) - coordinate_labels Returns: pandas DataFrame with adjusted labels. """ labels_frame = None check = 0 if configuration.get('relative_labels'): labels_frame = adjust_non_voc_csv( configuration['relative_labels'], self.image_folder, self.image_width, self.image_height, ) check += 1 if xml_folder := configuration.get('xml_labels_folder'): if check: raise ValueError(f'Got more than one configuration') voc_conf = configuration.get('voc_conf') assert voc_conf, f'Missing VOC configuration json file.' labels_frame = parse_voc_folder( xml_folder, get_abs_path(voc_conf, verify=True), ) labels_frame.to_csv( get_abs_path('output', 'data', 'parsed_from_xml.csv', create_parents=True), index=False, ) check += 1 if coordinate_labels := configuration.get('coordinate_labels'): if check: raise ValueError(f'Got more than one configuration') labels_frame = pd.read_csv( get_abs_path(coordinate_labels, verify=True)) check += 1
def train( self, epochs, batch_size, learning_rate, new_anchors_conf=None, new_dataset_conf=None, dataset_name=None, weights=None, evaluate=True, merge_evaluation=True, evaluation_workers=8, shuffle_buffer=512, min_overlaps=None, display_stats=True, plot_stats=True, save_figs=True, clear_outputs=False, n_epoch_eval=None, ): """ Train on the dataset. Args: epochs: Number of training epochs. batch_size: Training batch size. learning_rate: non-negative value. new_anchors_conf: A dictionary containing anchor generation configuration. new_dataset_conf: A dictionary containing dataset generation configuration. dataset_name: Name of the dataset for model checkpoints. weights: .tf or .weights file evaluate: If False, the trained model will not be evaluated after training. merge_evaluation: If False, training and validation maps will be calculated separately. evaluation_workers: Parallel predictions. shuffle_buffer: Buffer size for shuffling datasets. min_overlaps: a float value between 0 and 1, or a dictionary containing each class in self.class_names mapped to its minimum overlap display_stats: If True and evaluate=True, evaluation statistics will be displayed. plot_stats: If True, Precision and recall curves as well as comparative bar charts will be plotted save_figs: If True and plot_stats=True, figures will be saved clear_outputs: If True, old outputs will be cleared n_epoch_eval: Conduct evaluation every n epoch. Returns: history object, pandas DataFrame with statistics, mAP score. """ min_overlaps = min_overlaps or 0.5 if clear_outputs: self.clear_outputs() activate_gpu() LOGGER.info(f'Starting training ...') if new_anchors_conf: LOGGER.info(f'Generating new anchors ...') self.generate_new_anchors(new_anchors_conf) self.create_models(reverse_v4=True) if weights: self.load_weights(weights) if new_dataset_conf: self.create_new_dataset(new_dataset_conf) self.check_tf_records() training_dataset = self.initialize_dataset(self.train_tf_record, batch_size, shuffle_buffer) valid_dataset = self.initialize_dataset(self.valid_tf_record, batch_size, shuffle_buffer) optimizer = tf.keras.optimizers.Adam(learning_rate) loss = [ calculate_loss(self.anchors[mask], self.classes, self.iou_threshold) for mask in self.masks ] self.training_model.compile(optimizer=optimizer, loss=loss) checkpoint_path = get_abs_path( 'models', f'{dataset_name or "trained"}_model.tf') callbacks = self.create_callbacks(checkpoint_path) if n_epoch_eval: mid_train_eval = MidTrainingEvaluator( self.input_shape, self.model_configuration, self.classes_file, self.train_tf_record, self.valid_tf_record, self.anchors, self.masks, self.max_boxes, self.iou_threshold, self.score_threshold, n_epoch_eval, merge_evaluation, evaluation_workers, shuffle_buffer, min_overlaps, display_stats, plot_stats, save_figs, checkpoint_path, self.image_folder, ) callbacks.append(mid_train_eval) history = self.training_model.fit( training_dataset, epochs=epochs, callbacks=callbacks, validation_data=valid_dataset, ) LOGGER.info('Training complete') if evaluate: evaluations = self.evaluate( checkpoint_path, merge_evaluation, evaluation_workers, shuffle_buffer, min_overlaps, display_stats, plot_stats, save_figs, ) return evaluations, history return history
def load_weights(self, weights_file): """ Load DarkNet weights or checkpoint/pre-trained weights. Args: weights_file: .weights or .tf file path. Returns: None """ assert (suffix := Path(weights_file).suffix) in [ '.tf', '.weights', ], 'Invalid weights file' assert ( self.classes == 80 if suffix == '.weights' else 1 ), f'DarkNet model should contain 80 classes, {self.classes} is given.' if suffix == '.tf': self.training_model.load_weights(get_abs_path(weights_file)) LOGGER.info(f'Loaded weights: {weights_file} ... success') return with open(get_abs_path(weights_file, verify=True), 'rb') as weights_data: LOGGER.info(f'Loading pre-trained weights ...') major, minor, revision, seen, _ = np.fromfile(weights_data, dtype=np.int32, count=5) self.model_layers = [ layer for layer in self.training_model.layers if id(layer) not in [id(item) for item in self.output_layers] ] self.model_layers.sort( key=lambda layer: int(layer.name.split('_')[1])) self.model_layers.extend(self.output_layers) for i, layer in enumerate(self.model_layers): current_read = weights_data.tell() total_size = os.fstat(weights_data.fileno()).st_size if current_read == total_size: print() break print( f'\r{round(100 * (current_read / total_size))}' f'%\t{current_read}/{total_size}', end='', ) if 'conv2d' not in layer.name: continue next_layer = self.model_layers[i + 1] b_norm_layer = (next_layer if 'batch_normalization' in next_layer.name else None) filters = layer.filters kernel_size = layer.kernel_size[0] input_dimension = layer.get_input_shape_at(-1)[-1] convolution_bias = (np.fromfile( weights_data, dtype=np.float32, count=filters) if b_norm_layer is None else None) bn_weights = (np.fromfile( weights_data, dtype=np.float32, count=4 * filters).reshape( (4, filters))[[1, 0, 2, 3]] if (b_norm_layer is not None) else None) convolution_shape = ( filters, input_dimension, kernel_size, kernel_size, ) convolution_weights = (np.fromfile( weights_data, dtype=np.float32, count=np.product(convolution_shape), ).reshape(convolution_shape).transpose([2, 3, 1, 0])) if b_norm_layer is None: try: layer.set_weights( [convolution_weights, convolution_bias]) except ValueError: pass if b_norm_layer is not None: layer.set_weights([convolution_weights]) b_norm_layer.set_weights(bn_weights) assert len(weights_data.read()) == 0, 'failed to read all data' LOGGER.info(f'\nLoaded weights: {weights_file} ... success')
def __init__( self, input_shape, model_configuration, classes=80, anchors=None, masks=None, max_boxes=100, iou_threshold=0.5, score_threshold=0.5, ): """ Initialize yolo model. Args: input_shape: tuple(n, n, c) model_configuration: Path to DarkNet cfg file containing configuration. classes: Number of classes(defaults to 80 for Coco objects) anchors: numpy array of anchors (x, y) pairs masks: numpy array of masks. max_boxes: Maximum boxes in a single image. iou_threshold: Minimum overlap that counts as a valid detection. score_threshold: Minimum confidence that counts as a valid detection. """ assert any(( '3' in model_configuration, '4' in model_configuration, 'Invalid model configuration', )) self.version_anchors = { 'v3': np.array( [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326), ], np.float32, ), 'v4': np.array([ (12, 16), (19, 36), (40, 28), (36, 75), (76, 55), (72, 146), (142, 110), (192, 243), (459, 401), ]), } self.version_masks = { 'v3': np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]]), 'v4': np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]), } self.current_layer = 1 self.input_shape = input_shape self.classes = classes self.anchors = anchors if anchors is None: if '3' in model_configuration: self.anchors = self.version_anchors['v3'] if '4' in model_configuration: self.anchors = self.version_anchors['v4'] if self.anchors[0][0] > 1: self.anchors = self.anchors / input_shape[0] self.masks = masks if masks is None: if '3' in model_configuration: self.masks = self.version_masks['v3'] if '4' in model_configuration: self.masks = self.version_masks['v4'] self.funcs = ( ZeroPadding2D, BatchNormalization, LeakyReLU, Conv2D, Add, Input, UpSampling2D, Concatenate, Lambda, Mish, MaxPooling2D, ) self.func_names = [ 'zero_padding', 'batch_normalization', 'leaky_relu', 'conv2d', 'add', 'input', 'up_sample', 'concat', 'lambda', 'mish', 'maxpool2d', ] self.layer_names = { func.__name__: f'layer_CURRENT_LAYER_{name}' for func, name in zip(self.funcs, self.func_names) } self.shortcuts = [] self.previous_layer = None self.training_model = None self.inference_model = None self.output_indices = [] self.output_layers = [] self.max_boxes = max_boxes self.iou_threshold = iou_threshold self.score_threshold = score_threshold self.model_configuration = get_abs_path(model_configuration, verify=True) self.model_layers = []
def make_predictions( self, trained_weights, merge=False, workers=16, shuffle_buffer=512, batch_size=64, ): """ Make predictions on both training and validation data sets and save results as csv in output folder. Args: trained_weights: Trained .tf weights or .weights file (in case self.classes = 80). merge: If True a single file will be saved for training and validation sets predictions combined. workers: Parallel predictions. shuffle_buffer: int, shuffle dataset buffer size. batch_size: Prediction batch size. Returns: 1 combined pandas DataFrame for entire dataset predictions or 2 pandas DataFrame(s) for training and validation data sets respectively. """ self.create_models( reverse_v4=True if trained_weights.endswith('tf') else False) self.load_weights(trained_weights) features = get_feature_map() train_dataset = read_tfr( self.train_tf_record, self.classes_file, features, self.max_boxes, get_features=True, ) valid_dataset = read_tfr( self.valid_tf_record, self.classes_file, features, self.max_boxes, get_features=True, ) train_dataset.shuffle(shuffle_buffer) valid_dataset.shuffle(shuffle_buffer) train_dataset = iter(train_dataset) valid_dataset = iter(valid_dataset) train_predictions = self.predict_dataset(train_dataset, workers, 'train', batch_size) valid_predictions = self.predict_dataset(valid_dataset, workers, 'valid', batch_size) if merge: predictions = pd.concat([train_predictions, valid_predictions]) save_path = get_abs_path('output', 'data', 'full_dataset_predictions.csv', create_parents=True) predictions.to_csv(save_path, index=False) return predictions train_path = get_abs_path('output', 'data', 'train_dataset_predictions.csv', create_parents=True) valid_path = get_abs_path('output', 'data', 'valid_dataset_predictions.csv', create_parents=True) train_predictions.to_csv(train_path, index=False) valid_predictions.to_csv(valid_path, index=False) return train_predictions, valid_predictions