Exemple #1
0
def processFiles(pos_dir,
                 neg_dir,
                 recurse=False,
                 output_file=False,
                 output_filename=None,
                 color_space="bgr",
                 channels=[0, 1, 2],
                 hog_features=False,
                 hist_features=False,
                 spatial_features=False,
                 hog_lib="cv",
                 size=(64, 64),
                 hog_bins=9,
                 pix_per_cell=(8, 8),
                 cells_per_block=(2, 2),
                 block_stride=None,
                 block_norm="L1",
                 transform_sqrt=True,
                 signed_gradient=False,
                 hist_bins=16,
                 spatial_size=(16, 16)):
    """
    Extract features from positive samples and negative samples.
    Store feature vectors in a dict and optionally save to pickle file.

    @param pos_dir (str): Path to directory containing positive samples.
    @param neg_dir (str): Path to directory containing negative samples.
    @param recurse (bool): Traverse directories recursively (else, top-level only).
    @param output_file (bool): Save processed samples to file.
    @param output_filename (str): Output file filename.
    @param color_space (str): Color space conversion.
    @param channels (list): Image channel indices to use.
    
    For remaining arguments, refer to Descriptor class:
    @see descriptor.Descriptor#__init__(...)

    @return feature_data (dict): Lists of sample features split into training,
        validation, test sets; scaler object; parameters used to
        construct descriptor and process images.

    NOTE: OpenCV HOGDescriptor currently only supports 1-channel and 3-channel
    images, not 2-channel images.
    """

    if not (hog_features or hist_features or spatial_features):
        raise RuntimeError(
            "No features selected (set hog_features=True, " +
            "hist_features=True, and/or spatial_features=True.)")

    pos_dir = os.path.abspath(pos_dir)
    neg_dir = os.path.abspath(neg_dir)

    if not os.path.isdir(pos_dir):
        raise FileNotFoundError("Directory " + pos_dir + " does not exist.")
    if not os.path.isdir(neg_dir):
        raise FileNotFoundError("Directory " + neg_dir + " does not exist.")

    print("Building file list...")
    if recurse:
        pos_files = [
            os.path.join(rootdir, file)
            for rootdir, _, files in os.walk(pos_dir) for file in files
        ]
        neg_files = [
            os.path.join(rootdir, file)
            for rootdir, _, files in os.walk(neg_dir) for file in files
        ]
    else:
        pos_files = [
            os.path.join(pos_dir, file) for file in os.listdir(pos_dir)
            if os.path.isfile(os.path.join(pos_dir, file))
        ]
        neg_files = [
            os.path.join(neg_dir, file) for file in os.listdir(neg_dir)
            if os.path.isfile(os.path.join(neg_dir, file))
        ]

    print("{} positive files and {} negative files found.\n".format(
        len(pos_files), len(neg_files)))

    # Get color space information.
    color_space = color_space.lower()
    if color_space == "gray":
        color_space_name = "grayscale"
        cv_color_const = cv2.COLOR_BGR2GRAY
        channels = [0]
    elif color_space == "hls":
        color_space_name = "HLS"
        cv_color_const = cv2.COLOR_BGR2HLS
    elif color_space == "hsv":
        color_space_name = "HSV"
        cv_color_const = cv2.COLOR_BGR2HSV
    elif color_space == "lab":
        color_space_name = "Lab"
        cv_color_const = cv2.COLOR_BGR2Lab
    elif color_space == "luv":
        color_space_name = "Luv"
        cv_color_const = cv2.COLOR_BGR2Luv
    elif color_space == "ycrcb" or color_space == "ycc":
        color_space_name = "YCrCb"
        cv_color_const = cv2.COLOR_BGR2YCrCb
    elif color_space == "yuv":
        color_space_name = "YUV"
        cv_color_const = cv2.COLOR_BGR2YUV
    else:
        color_space_name = "BGR"
        cv_color_const = -1

    # Get names of desired features.
    features = [
        feature_name for feature_name, feature_bool in
        zip(["HOG", "color histogram", "spatial"],
            [hog_features, hist_features, spatial_features])
        if feature_bool == True
    ]

    feature_str = features[0]
    for feature_name in features[1:]:
        feature_str += ", " + feature_name

    # Get information about channel indices.
    if len(channels) == 2 and hog_features and hog_lib == "cv":
        warnings.warn("OpenCV HOG does not support 2-channel images",
                      RuntimeWarning)

    channel_index_str = str(channels[0])
    for ch_index in channels[1:]:
        channel_index_str += ", {}".format(ch_index)

    print("Converting images to " + color_space_name + " color space and " +
          "extracting " + feature_str + " features from channel(s) " +
          channel_index_str + ".\n")

    # Store feature vectors for positive samples in list pos_features and
    # for negative samples in neg_features.
    pos_features = []
    neg_features = []
    start_time = time.time()

    # Get feature descriptor object to call on each sample.
    descriptor = Descriptor(hog_features=hog_features,
                            hist_features=hist_features,
                            spatial_features=spatial_features,
                            hog_lib=hog_lib,
                            size=size,
                            hog_bins=hog_bins,
                            pix_per_cell=pix_per_cell,
                            cells_per_block=cells_per_block,
                            block_stride=block_stride,
                            block_norm=block_norm,
                            transform_sqrt=transform_sqrt,
                            signed_gradient=signed_gradient,
                            hist_bins=hist_bins,
                            spatial_size=spatial_size)

    # Iterate through files and extract features.
    for i, filepath in enumerate(pos_files + neg_files):
        image = cv2.imread(filepath)
        if cv_color_const > -1:
            image = cv2.cvtColor(image, cv_color_const)

        if len(image.shape) > 2:
            image = image[:, :, channels]

        feature_vector = descriptor.getFeatureVector(image)

        if i < len(pos_files):
            pos_features.append(feature_vector)
        else:
            neg_features.append(feature_vector)

    print("Features extracted from {} files in {:.1f} seconds\n".format(
        len(pos_features) + len(neg_features),
        time.time() - start_time))

    # Store the length of the feature vector produced by the descriptor.
    num_features = len(pos_features[0])

    ##TODO: Instantiate scaler and scale features.
    scaler = StandardScaler()
    scaler.fit(np.concatenate((pos_features, neg_features), axis=0))
    pos_features = scaler.transform(pos_features)
    neg_features = scaler.transform(neg_features)

    ##TODO: Randomize lists of feature vectors. Split 75/20/5 into training,
    # validation, and test sets.
    print(
        "Shuffling samples into training, cross-validation, and test sets.\n")
    random.shuffle(pos_features)
    random.shuffle(neg_features)

    # Use pos_train, pos_val, pos_test and neg_train, neg_val, neg_test to represent
    # the Train, Validation and Test sets of Positive and Negtive sets.
    pos_train, pos_val, pos_test = np.split(
        pos_features,
        [int(.75 * len(pos_features)),
         int(.95 * len(pos_features))])
    neg_train, neg_val, neg_test = np.split(
        neg_features,
        [int(.75 * len(neg_features)),
         int(.95 * len(neg_features))])

    # Store sample data and parameters in dict.
    # Descriptor class object seems to produce errors when unpickling and
    # has been commented out below. The descriptor will be re-instantiated
    # by the Detector object later.
    feature_data = {
        "pos_train": pos_train,
        "neg_train": neg_train,
        "pos_val": pos_val,
        "neg_val": neg_val,
        "pos_test": pos_test,
        "neg_test": neg_test,
        #"descriptor": descriptor,
        "scaler": scaler,
        "hog_features": hog_features,
        "hist_features": hist_features,
        "spatial_features": spatial_features,
        "color_space": color_space,
        "cv_color_const": cv_color_const,
        "channels": channels,
        "hog_lib": hog_lib,
        "size": size,
        "hog_bins": hog_bins,
        "pix_per_cell": pix_per_cell,
        "cells_per_block": cells_per_block,
        "block_stride": block_stride,
        "block_norm": block_norm,
        "transform_sqrt": transform_sqrt,
        "signed_gradient": signed_gradient,
        "hist_bins": hist_bins,
        "spatial_size": spatial_size,
        "num_features": num_features
    }

    # Pickle to file if desired.
    if output_file:
        if output_filename is None:
            output_filename = (datetime.now().strftime("%Y%m%d%H%M") +
                               "_data.pkl")

        pickle.dump(feature_data, open(output_filename, "wb"))
        print(
            "Sample and parameter data saved to {}\n".format(output_filename))

    return feature_data
class Detector:
    """
    Class for finding objects in a video stream. Loads and utilizes a
    pretrained classifier.
    """
    def __init__(self,
                 init_size=(64, 64),
                 x_overlap=0.5,
                 y_step=0.05,
                 x_range=(0, 1),
                 y_range=(0, 1),
                 scale=1.5):
        """For input arguments, @see slidingwindow.#slidingWindow(...)"""

        self.init_size = init_size
        self.x_overlap = x_overlap
        self.y_step = y_step
        self.x_range = x_range
        self.y_range = y_range
        self.scale = scale
        self.windows = None

    def loadClassifier(self, filepath=None, classifier_data=None):
        """
        Load a classifier trained by the functions in train.py. Either a dict
        (classifier_data) or pickled file (filepath) may be supplied.
        """

        if filepath is not None:
            filepath = os.path.abspath(filepath)
            if not os.path.isfile(filepath):
                raise FileNotFoundError("File " + filepath +
                                        " does not exist.")
            classifier_data = pickle.load(open(filepath, "rb"))
        else:
            classifier_data = classifier_data

        if classifier_data is None:
            raise ValueError("Invalid classifier data supplied.")

        self.classifier = classifier_data["classifier"]
        self.scaler = classifier_data["scaler"]
        self.cv_color_const = classifier_data["cv_color_const"]
        self.channels = classifier_data["channels"]

        # Simply loading the descriptor from the dict with
        #   self.descriptor = classifier_data["descriptor"]
        # produces an error. Thus, we instantiate a new descriptor object
        # using the same parameters on which the classifier was trained.
        self.descriptor = Descriptor(
            hog_features=classifier_data["hog_features"],
            hist_features=classifier_data["hist_features"],
            spatial_features=classifier_data["spatial_features"],
            hog_lib=classifier_data["hog_lib"],
            size=classifier_data["size"],
            hog_bins=classifier_data["hog_bins"],
            pix_per_cell=classifier_data["pix_per_cell"],
            cells_per_block=classifier_data["cells_per_block"],
            block_stride=classifier_data["block_stride"],
            block_norm=classifier_data["block_norm"],
            transform_sqrt=classifier_data["transform_sqrt"],
            signed_gradient=classifier_data["signed_gradient"],
            hist_bins=classifier_data["hist_bins"],
            spatial_size=classifier_data["spatial_size"])

        return self

    def classify(self, image):
        """
        Classify windows of an image as "positive" (containing the desired
        object) or "negative". Return a list of positively classified windows.
        """

        if self.cv_color_const > -1:
            image = cv2.cvtColor(image, self.cv_color_const)

        if len(image.shape) > 2:
            image = image[:, :, self.channels]
        else:
            image = image[:, :, np.newaxis]

        feature_vectors = [
            self.descriptor.getFeatureVector(image[y_upper:y_lower,
                                                   x_upper:x_lower, :])
            for (x_upper, y_upper, x_lower, y_lower) in self.windows
        ]

        # Scale feature vectors, predict, and return predictions.
        feature_vectors = self.scaler.transform(feature_vectors)
        predictions = self.classifier.predict(feature_vectors)
        return [
            self.windows[ind] for ind in np.argwhere(predictions == 1)[:, 0]
        ]

    def detectVideo(self,
                    video_capture=None,
                    num_frames=9,
                    threshold=120,
                    min_bbox=None,
                    show_video=True,
                    draw_heatmap=True,
                    draw_heatmap_size=0.2,
                    write=False,
                    write_fps=24):
        """
        Find objects in each frame of a video stream by integrating bounding
        boxes over several frames to produce a heatmap of pixels with high
        prediction density, ignoring pixels below a threshold, and grouping
        the remaining pixels into objects. Draw boxes around detected objects.

        @param video_capture (VideoCapture): cv2.VideoCapture object.
        @param num_frames (int): Number of frames to sum over.
        @param threshold (int): Threshold for heatmap pixel values.
        @param min_bbox (int, int): Minimum (width, height) of a detection
            bounding box in pixels. Boxes smaller than this will not be drawn.
            Defaults to 2% of image size.
        @param show_video (bool): Display the video.
        @param draw_heatmap (bool): Display the heatmap in an inset in the
            upper left corner of the video.
        @param draw_heatmap_size (float): Size of the heatmap inset as a
            fraction between (0, 1) of the image size.
        @param write (bool): Write the resulting video, with detection
            bounding boxes and/or heatmap, to a video file.
        @param write_fps (num): Frames per second for the output video.
        """

        cap = video_capture
        if not cap.isOpened():
            raise RuntimeError("Error opening VideoCapture.")
        (grabbed, frame) = cap.read()
        (h, w) = frame.shape[:2]

        # Store coordinates of all windows to be checked at every frame.
        self.windows = slidingWindow((w, h),
                                     init_size=self.init_size,
                                     x_overlap=self.x_overlap,
                                     y_step=self.y_step,
                                     x_range=self.x_range,
                                     y_range=self.y_range,
                                     scale=self.scale)

        if min_bbox is None:
            min_bbox = (int(0.02 * w), int(0.02 * h))

        # Heatmap inset size.
        inset_size = (int(draw_heatmap_size * w), int(draw_heatmap_size * h))

        if write:
            vidFilename = datetime.now().strftime("%Y%m%d%H%M") + ".avi"
            fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
            writer = cv2.VideoWriter(vidFilename, fourcc, write_fps, (w, h))

        # Compute the heatmap for each frame and store in current_heatmap.
        # Store the last num_frames heatmaps in deque last_N_frames. At each
        # frame, sum in the deque to compute summed_heatmap. After
        # thresholding, label blobs in summed_heatmap with
        # scipy.ndimage.measurements.label and store in heatmap_labels.
        current_heatmap = np.zeros((frame.shape[:2]), dtype=np.uint8)
        summed_heatmap = np.zeros_like(current_heatmap, dtype=np.uint8)
        last_N_frames = deque(maxlen=num_frames)
        heatmap_labels = np.zeros_like(current_heatmap, dtype=np.int)

        # Weights for the frames in last_N_frames for producing summed_heatmap.
        # Recent frames are weighted more heavily than older frames.
        weights = np.linspace(1 / (num_frames + 1), 1, num_frames)
        frame_array = []
        while True:
            (grabbed, frame) = cap.read()
            if not grabbed:
                break

            current_heatmap[:] = 0
            summed_heatmap[:] = 0
            for (x_upper, y_upper, x_lower, y_lower) in self.classify(frame):
                current_heatmap[y_upper:y_lower, x_upper:x_lower] += 10

            last_N_frames.append(current_heatmap)
            for i, heatmap in enumerate(last_N_frames):
                cv2.add(summed_heatmap,
                        (weights[i] * heatmap).astype(np.uint8),
                        dst=summed_heatmap)

            # Apply blur and/or dilate to the heatmap.
            #cv2.GaussianBlur(summed_heatmap, (5,5), 0, dst=summed_heatmap)
            cv2.dilate(summed_heatmap,
                       np.ones((7, 7), dtype=np.uint8),
                       dst=summed_heatmap)

            if draw_heatmap:
                inset = cv2.resize(summed_heatmap,
                                   inset_size,
                                   interpolation=cv2.INTER_AREA)
                inset = cv2.cvtColor(inset, cv2.COLOR_GRAY2BGR)
                frame[:inset_size[1], :inset_size[0], :] = inset

            # Ignore heatmap pixels below threshold.
            summed_heatmap[summed_heatmap <= threshold] = 0

            # Label remaining blobs with scipy.ndimage.measurements.label.
            num_objects = label(summed_heatmap, output=heatmap_labels)

            # Determine the largest bounding box around each object.
            for obj in range(1, num_objects + 1):
                (Y_coords, X_coords) = np.nonzero(heatmap_labels == obj)
                x_upper, y_upper = min(X_coords), min(Y_coords)
                x_lower, y_lower = max(X_coords), max(Y_coords)

                # Only draw box if object is larger than min bbox size.
                if (x_lower - x_upper > min_bbox[0]
                        and y_lower - y_upper > min_bbox[1]):
                    cv2.rectangle(frame, (x_upper, y_upper),
                                  (x_lower, y_lower), (0, 255, 0), 6)

            if write:
                writer.write(frame)

            if show_video:
                cv2.imshow("Detection", frame)
                cv2.waitKey(0)  #control by myself

        cap.release()

        if write:
            writer.release()