Ejemplo n.º 1
0
    def cropped_obj_images_to_gcs(self):
        # Read, Crop, and Resize Images
        mf.print_timestamp_message(
            f'Reading, cropping, and resizing {self.class_name} images')
        image_arrays_concat = self.get_cropped_obj_images()
        n_images = image_arrays_concat.shape[0]

        # Write Images to Google Cloud Storage Bucket
        image_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_array_save_name}'
        mf.print_timestamp_message(
            f'Writing {n_images} cropped images to GCS bucket/folder {self.bucket_name}/{image_save_name}'
        )
        mf.save_np_array_to_gsc(np_array=image_arrays_concat,
                                bucket_name=self.bucket_name,
                                file_name=image_save_name)
Ejemplo n.º 2
0
    def resize_and_save_images(self):
        # Generate Class Information
        mf.print_timestamp_message(
            f'Getting urls, bounding boxes, and image IDs for {self.class_name} images'
        )
        urls, bbox_df, image_ids, class_image_df = self.get_image_class_info()

        # Read and Resize Images
        n_url = len(urls)
        mf.print_timestamp_message(
            f'Reading images from {n_url} URLs and resizing to {self.resize_height} X {self.resize_width}'
        )
        image_arrays = load_resize_images_from_urls(
            url_list=urls,
            resize_height=self.resize_height,
            resize_width=self.resize_width)
        image_arrays_concat = np.array(image_arrays)

        # Write Images to Google Cloud Storage Bucket
        image_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_array_save_name}'
        mf.print_timestamp_message(
            f'Writing images to GCS bucket/folder {self.bucket_name}/{image_save_name}'
        )
        mf.save_np_array_to_gsc(np_array=image_arrays_concat,
                                bucket_name=self.bucket_name,
                                file_name=image_save_name)

        # Write Bounding Box Csv to Google Cloud Storage Bucket
        bbox_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_bbox_save_name}'
        mf.print_timestamp_message(
            f'Writing bounding box csv file to GCS bucket/folder {self.bucket_name}/{bbox_save_name}'
        )
        mf.write_csv_to_gcs(dframe=bbox_df,
                            bucket_name=self.bucket_name,
                            file_name=bbox_save_name)

        # Write Class Info Csv to Google Cloud Storage Bucket
        class_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_class_save_name}'
        mf.print_timestamp_message(
            f'Writing class image info csv file to GCS bucket/folder {self.bucket_name}/{class_save_name}'
        )
        mf.write_csv_to_gcs(dframe=class_image_df,
                            bucket_name=self.bucket_name,
                            file_name=class_save_name)
Ejemplo n.º 3
0
    def get_processed_data(self):
        # Retrieve Images, Classification Array, and Bounding Boxes for Multiple Classes
        x_img_list = []
        y_bbox_list = []
        y_classif_list = []

        # Loop Over Image CLasses to Retrieve Data
        for i, x in enumerate(self.class_list):
            mf.print_timestamp_message(
                f"Pulling data for class '{x}' ({i+1} of {len(self.class_list)}) from Google Cloud Storage"
            )
            image_retriever = OpenCVImageClassRetriever(class_name=x)
            x_img, y_bbox = image_retriever.get_training_data()
            x_img_list.append(x_img)
            y_bbox_list.append(y_bbox)
            y_classif_list.append([x] * x_img.shape[0])

        # Concatenate / Unnest Outer Lists
        x_img_list = np.vstack(x_img_list)
        y_bbox_list = np.vstack(y_bbox_list)
        y_classif_list = mf.unnest_list_of_lists(y_classif_list)
        return x_img_list, y_bbox_list, y_classif_list
Ejemplo n.º 4
0
    def run_grid_search(self):
        # Output Lists
        output_categ_acc = []
        output_exec_time = []
        output_folds = []
        output_models = []
        output_model_number = []

        # Train, Test, Validation Folds
        train_k, test_k, valid_k = self.train_test_val_folds()

        for iM, model in enumerate(self.model_list):
            for k in range(self.k_folds):
                # Separate Train and Test in Generators
                indices = self.get_fold_indices()
                train_i = mf.unnest_list_of_lists(
                    [j for i, j in enumerate(indices) if i in train_k[k]])
                test_i = mf.unnest_list_of_lists(
                    [j for i, j in enumerate(indices) if i == test_k[k]])
                valid_i = mf.unnest_list_of_lists(
                    [j for i, j in enumerate(indices) if i == valid_k[k]])
                train_gen = self.batch_generator(x[train_i],
                                                 y[train_i],
                                                 batch_size=self.batch_size)
                valid_gen = self.batch_generator(x[valid_i],
                                                 y[valid_i],
                                                 batch_size=self.batch_size)

                # Calculate Class Weights
                class_wt_dict = imm.make_class_weight_dict(
                    [np.argmax(x) for x in y[train_i]], return_dict=True)

                # Define Callbacks
                check_point = keras.callbacks.ModelCheckpoint(
                    self.model_save_name,
                    monitor='val_loss',
                    verbose=1,
                    save_best_only=True,
                    mode='min')
                early_stop = keras.callbacks.EarlyStopping(
                    monitor='val_loss', mode='min', patience=self.patience)

                # Train Model
                train_start_time = time.time()
                keras.backend.clear_session()

                # Define Model Compilation
                model.compile(loss=self.loss,
                              optimizer=self.optimizer,
                              metrics=self.metrics)

                model.fit(
                    train_gen,
                    epochs=self.epochs,
                    validation_data=valid_gen,
                    steps_per_epoch=int(len(train_i)) // self.batch_size,
                    validation_steps=int(len(valid_i)) // self.batch_size,
                    callbacks=[check_point, early_stop, self.lr_schedule],
                    class_weight=class_wt_dict,
                    verbose=2)

                train_end_time = time.time()
                exec_time = train_end_time - train_start_time

                # Accuracy on Test Set
                saved_model = keras.models.load_model(self.model_save_name)
                pred_values = saved_model.predict(x[test_i])
                output_categ_acc.append(
                    np.mean(
                        np.equal(np.argmax(y[test_i], axis=-1),
                                 np.argmax(pred_values, axis=-1))))
                output_exec_time.append(exec_time)
                output_folds.append(k)
                output_models.append(saved_model.name)
                output_model_number.append(iM)
                mf.print_timestamp_message(
                    f'Completed fold {k+1} of {self.k_folds} for model {iM+1} of {len(self.model_list)}'
                )

                # Delete Variables in Memory
                del train_gen, valid_gen, check_point, early_stop, saved_model, pred_values
                keras.backend.clear_session()

        # Collate Fold Results into DataFrame
        output_df = pd.DataFrame({
            'model': output_models,
            'model_number': output_model_number,
            'fold': output_folds,
            'categorical_accuracy': output_categ_acc,
            'execution_time': output_exec_time
        })
        return output_df
Ejemplo n.º 5
0
    def save_whole_images_and_bbox(self):
        # Retrieve Class Metadata
        image_retriever = OpenCVImageClassRetriever(class_name=self.class_name)
        bbox_df = image_retriever.get_bounding_box_df()
        desc_df = image_retriever.get_class_desc_df()

        # Image IDs
        unique_img_ids = list(
            np.unique(bbox_df[self.image_id_col].values.tolist()))
        if self.max_images is not None:
            unique_img_ids = unique_img_ids[:self.max_images]

        # Read and Crop Images with Bounding Boxes
        img_id_list = []
        img_list = []
        coord_list = []
        for img_id in tqdm.tqdm(unique_img_ids):
            try:
                # Subset Info Dataframes for Image ID
                bbox_df_i = bbox_df[bbox_df.ImageID == img_id]
                desc_df_i = desc_df[desc_df.ImageID == img_id]

                # Read Image
                img_i = read_url_image(desc_df_i['OriginalURL'].values[0])

                # Extract Cropped Objects
                bbox_coords = bbox_df_i[['XMin', 'XMax', 'YMin',
                                         'YMax']].values.tolist()
                for bbc in bbox_coords:
                    xmin, xmax, ymin, ymax = bbc
                    img_resized = resize(
                        img_i, (self.resize_width, self.resize_height))
                    correct_shape = (self.resize_width, self.resize_height, 3)
                    if (not is_blank_img(img_resized)
                            and img_resized.shape == correct_shape):
                        img_list.append(img_resized)
                        coord_list.append(bbc)
                        img_id_list.append(img_id)
            except:
                pass

        # Save Items
        class_folder_loc = f'{self.save_loc}{self.class_name}/'
        mf.create_folder_if_not_existing(class_folder_loc)
        mf.print_timestamp_message(
            f'Writing file with image IDs: {class_folder_loc}img_id_list.pkl')
        with open(f'{class_folder_loc}img_id_list.pkl', 'wb') as f:
            pickle.dump(img_id_list, f, protocol=4)
        mf.print_timestamp_message(
            f'Writing file with coordinates: {class_folder_loc}coord_list.pkl')
        with open(f'{class_folder_loc}coord_list.pkl', 'wb') as f:
            pickle.dump(coord_list, f, protocol=4)
        mf.print_timestamp_message(
            f'Writing file with numpy array: {class_folder_loc}img_arr.pkl')
        with open(f'{class_folder_loc}img_arr.pkl', 'wb') as f:
            pickle.dump(np.array(img_list), f, protocol=4)

        # Remove items from memory
        del img_list
        del coord_list
        del img_id_list
Ejemplo n.º 6
0
        rect = patches.Rectangle((xmin_p, ymin_p), box_width, box_height, linewidth = linewidth, edgecolor = box_color, facecolor = 'none')
        ax.text(xmin_p, ymin_p + y_offset, labels[i], color = text_color, fontsize = fontsize)
        ax.add_patch(rect)
    plt.imshow(img_arr)
    plt.show()


### Data Processing: Read & REsize Images, Get Bounding Box Coordinates
###############################################################################
image_id_list_dict = {}
coord_list_dict = {}
od_classes = cdp.config_obj_detection_classes


for i, odc in enumerate(od_classes):
    mf.print_timestamp_message(f'Starting {odc} class {(i+1)} of {len(od_classes)}')
    image_retriever = DetectionImageRetriever(class_name = odc, max_images = 5000, resize_height = 416, resize_width = 416)
    img_coord_dict, img_array_dict = image_retriever.get_whole_images_and_bbox()
    img_id_list = list(img_coord_dict.keys())
    image_id_list_dict[odc] = img_id_list
    for i, x in tqdm.tqdm(enumerate(img_id_list)):
        img_save_name = f'{intmd_save_loc}{x}.jpeg'
        im = Image.fromarray((img_array_dict.get(x) * 255).astype(np.uint8))
        im.save(img_save_name)
        
    coord_list_dict[odc] = img_coord_dict
    del img_id_list, img_coord_dict, img_array_dict;