Esempio n. 1
0
def detect_video(model, input_file, output_file, fps=30, score_filter=0.6):
    """Takes in a video and produces an output video with object detection
    run on it (i.e. displays boxes around detected objects in real-time).
    Output videos should have the .avi file extension. Note: some apps,
    such as macOS's QuickTime Player, have difficulty viewing these
    output videos. It's recommended that you download and use
    `VLC <https://www.videolan.org/vlc/index.html>`_ if this occurs.


    :param model: The trained model with which to run object detection.
    :type model: detecto.core.Model
    :param input_file: The path to the input video.
    :type input_file: str
    :param output_file: The name of the output file. Should have a .avi
        file extension.
    :type output_file: str
    :param fps: (Optional) Frames per second of the output video.
        Defaults to 30.
    :type fps: int
    :param score_filter: (Optional) Minimum score required to show a
        prediction. Defaults to 0.6.
    :type score_filter: float

    **Example**::

        >>> from detecto.core import Model
        >>> from detecto.visualize import detect_video

        >>> model = Model.load('model_weights.pth', ['tick', 'gate'])
        >>> detect_video(model, 'input_vid.mp4', 'output_vid.avi', score_filter=0.7)
    """

    # Read in the video
    video = cv2.VideoCapture(input_file)

    # Video frame dimensions
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Scale down frames when passing into model for faster speeds
    scaled_size = 800
    scale_down_factor = min(frame_height, frame_width) / scaled_size

    # The VideoWriter with which we'll write our video with the boxes and labels
    # Parameters: filename, fourcc, fps, frame_size
    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'DIVX'), fps,
                          (frame_width, frame_height))

    # Transform to apply on individual frames of the video
    transform_frame = transforms.Compose([  # TODO Issue #16
        transforms.ToPILImage(),
        transforms.Resize(scaled_size),
        transforms.ToTensor(),
        normalize_transform(),
    ])

    # Loop through every frame of the video
    while True:
        ret, frame = video.read()
        # Stop the loop when we're done with the video
        if not ret:
            break

        # The transformed frame is what we'll feed into our model
        # transformed_frame = transform_frame(frame)
        transformed_frame = frame  # TODO: Issue #16
        predictions = model.predict(transformed_frame)

        # Add the top prediction of each class to the frame
        for label, box, score in zip(*predictions):
            if score < score_filter:
                continue

            # Since the predictions are for scaled down frames,
            # we need to increase the box dimensions
            # box *= scale_down_factor  # TODO Issue #16

            # Create the box around each object detected
            # Parameters: frame, (start_x, start_y), (end_x, end_y), (r, g, b), thickness
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]),
                          (255, 0, 0), 3)

            # Write the label and score for the boxes
            # Parameters: frame, text, (start_x, start_y), font, font scale, (r, g, b), thickness
            cv2.putText(frame, '{}: {}'.format(label, round(score.item(), 2)),
                        (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (255, 0, 0), 3)

        # Write this frame to our video file
        out.write(frame)

        # If the 'q' key is pressed, break from the loop
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    # When finished, release the video capture and writer objects
    video.release()
    out.release()

    # Close all the frames
    cv2.destroyAllWindows()
from detecto import core, utils, visualize
from torchvision import transforms

augmentations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(saturation=0.5),
    transforms.ToTensor(),
    utils.normalize_transform(),
])

val_dataset = core.Dataset('/dataset/validation_images/')
dataset = core.Dataset('//dataset/train_images/', transform=augmentations)
model = core.Model(['rust'])
loader = core.DataLoader(dataset, batch_size=2, shuffle=True)

losses = model.fit(loader,
                   val_dataset,
                   epochs=10,
                   learning_rate=0.001,
                   lr_step_size=5,
                   verbose=True)

model.save('model/model_weights.pth')
Esempio n. 3
0
def procesVideo(model, input_file, output_file, fps=30):

    video = cv2.VideoCapture(input_file)

    # Video frame dimensions
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Scale down frames when passing into model for faster speeds
    scaled_size = 800
    scale_down_factor = min(frame_height, frame_width) / scaled_size

    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'DIVX'), fps,
                          (frame_width, frame_height))

    # Transform to apply on individual frames of the video
    transform_frame = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(scaled_size),
        transforms.ToTensor(),
        utils.normalize_transform(),
    ])

    while True:
        carMovin = []  #new array to keep track of all the moving cars
        ret, frame = video.read()  #Read frame
        if not ret:  #if frame not read exits loop
            break
        frameArea = frame_height * frame_width  #frame area to be used on mask to track objects big enough
        mask = subtractor.apply(frame)  #background subtractor method
        newmask = cv2.medianBlur(mask, 3)  #remove salt and paper noise
        (contour, heir) = cv2.findContours(
            newmask.copy(), cv2.RETR_EXTERNAL,
            cv2.CHAIN_APPROX_SIMPLE)  #Find the contours of moving objects
        for c in contour:  #go through list of contours and find the ones big enough
            (x, y, w, h) = cv2.boundingRect(c)
            area = ((x + w) - x + 1) * ((y + h) - y + 1)
            if (area > (frameArea * 0.001)):
                carMovin.append([(x, y), (x + w, y + h)
                                 ])  #put into the array objects big enough
        transformed_frame = transform_frame(
            frame
        )  #makes the frame smaller to be processed by detecto function
        labels, boxs, scores = model.predict(
            transformed_frame
        )  #once predictions have been made positive cars are saved as tensorflow arrays
        boxs *= scale_down_factor  #applys the scale down factor so the mapping is correct

        for box in boxs:  #get every box from the predictions
            colourcar = False  #keep track of car that is moving
            box = np.array(
                box.tolist(),
                dtype=int)  #convert from tensorflow array to np array
            for cars in carMovin:  # go through every moving car found
                if (
                        compare(box, cars)
                ):  #first compare function used to find rectangles that overlap
                    colourcar = True  #if true the box should be coloured
            if (colourcar):
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), red,
                              2)  #colour box red

        for spots in parkingspace_Coords:  #go through all the parking spots saved
            colourspot = False  #keep track of parking spot that has a car in it
            for box in boxs:  #Find all the cars that are parked
                box = np.array(
                    box.tolist(),
                    dtype=int)  #convert from tensorflow array to np array
                if comparesp(
                        box, spots
                ):  # second compare function for parking spots and car objects
                    colourspot = True
            if (colourspot):
                cv2.rectangle(frame, (spots[0][0], spots[0][1]),
                              (spots[2][0], spots[2][1]), blue,
                              2)  #parking lot with car
            else:
                cv2.rectangle(frame, (spots[0][0], spots[0][1]),
                              (spots[2][0], spots[2][1]), white,
                              2)  #parking lot without car
        out.write(frame)  #writes frame to file
        # If the 'q' key is pressed, break from the loop
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
    # When finished, release the video capture and writer objects
    video.release()
    out.release()
    # Close all the frames
    cv2.destroyAllWindows()
Esempio n. 4
0
    def trainMountingConfigClassifier(self,
                                      train_path,
                                      val_path,
                                      device=torch.device('cuda')):
        """
        This function uses Faster R-CNN ResNet50 FPN as the base network
        and as a transfer learning framework to train a model that performs
        object detection on the mounting configuration of solar arrays. It
        uses the training data to locate and classify mounting configuration
        of the solar installation. It uses the validation data to prevent
        overfitting and to test the prediction on the fly.

        Parameters
        -----------
        train_path: string
            This is the path to the folder that contains the training images
            Note that the directory must be structured in this format:
                    train_path/
                        ...images/
                            ......a_image_1.png
                            ......a_image_2.png
                        ...annotations/
                            ......b_image_1.xml
                            ......b_image_2.xml
        val_path: string
            This is the path to the folder that contains the validation images
            Note that the directory must be structured in this format:
                    val_path/
                        ...images/
                            ......a_image_1.png
                            ......a_image_2.png
                        ...annotations/
                            ......b_image_1.xml
                            ......b_image_2.xml
        device: string
            This argument is passed to the Model() class in Detecto.
            It determines how to run the model: either on GPU via Cuda
            (default setting), or on CPU. Please note that running the
            model on GPU results in significantly faster training times.

        Returns
        -----------
        model: detecto.core.Model object
            The final trained mounting configuration object detection
            model.
        """
        # Convert the data set combinations (png + xml) to a CSV record.
        val_labels_path = (val_path + '/annotations.csv')
        train_labels_path = (train_path + '/annotations.csv')
        utils.xml_to_csv(train_path + '/annotations/', train_labels_path)
        utils.xml_to_csv(val_path + '/annotations/', val_labels_path)
        # Custom oversampling to balance out our classes
        train_data = pd.read_csv(train_labels_path)
        class_count = pd.Series(train_data['class'].value_counts())
        train_data_resampled = train_data.copy()
        for index, count in class_count.iteritems():
            number_times_resample = class_count.max() - count
            # Randomly sample a class X times
            class_index_list = list(
                train_data[train_data['class'] == index].index)
            # Resample the list with with replacement
            idx_to_duplicate = choices(class_index_list,
                                       k=number_times_resample)
            for idx in idx_to_duplicate:
                dup = train_data.loc[idx]
                # Add to the dataframe
                train_data_resampled = \
                    train_data_resampled.append(dup, ignore_index=True)
        # Reindex after all of the duplicates have been added
        train_data_resampled = train_data_resampled.reset_index(drop=True)
        # Re-write the resampled data set
        train_data_resampled.to_csv(train_labels_path, index=False)
        custom_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(800),
            transforms.ToTensor(),
            utils.normalize_transform()
        ])
        # Load in the training and validation data sets
        dataset = core.Dataset(train_labels_path,
                               train_path + '/images',
                               transform=custom_transforms)
        val_dataset = core.Dataset(val_labels_path, val_path + '/images')
        # Customize training options
        loader = core.DataLoader(dataset,
                                 batch_size=self.batch_size,
                                 shuffle=True)
        model = core.Model([
            "ground-fixed", "carport-fixed", "rooftop-fixed",
            "ground-single_axis_tracker"
        ],
                           device=device)
        losses = model.fit(loader,
                           val_dataset,
                           epochs=self.no_of_epochs,
                           learning_rate=self.learning_rate,
                           verbose=True)
        plt.plot(losses)
        plt.show()
        return model
bar = progressbar.ProgressBar(current_value=current_frame, max_value=frame_count).start()

# Scale down frames when passing into model for faster speeds
scaled_size = 256
scale_down_factor = min(frame_height, frame_width) / scaled_size

# The VideoWriter with which we'll write our video with the boxes and labels
# Parameters: filename, fourcc, fps, frame_size
out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Transform to apply on individual frames of the video
transform_frame = transforms.Compose([  # TODO Issue #16
    transforms.ToPILImage(),
    transforms.Resize(scaled_size),
    transforms.ToTensor(),
    normalize_transform(),
])

# Loop through every frame of the video
while True:
    ret, frame = video.read()
    # Stop the loop when we're done with the video
    if not ret:
        break

    # The transformed frame is what we'll feed into our model
    transformed_frame = transform_frame(frame)
    predictions = model.predict(transformed_frame)

    # Add the top prediction of each class to the frame
    for label, box, score in zip(*predictions):
Esempio n. 6
0
import matplotlib.pyplot as plt
from torchvision import transforms
from detecto.utils import normalize_transform
from detecto.core import Dataset, DataLoader, Model

IMAGE_DIR = '/Users/noahmushkin/codes/selenium-python-scraping/data/images/cameras/'
LABEL_DIR = '/Users/noahmushkin/codes/selenium-python-scraping/data/labeled_cams_convert/'

img_transform = transforms.Compose([
    transforms.ToPILImage(),
    # Note: all images with a size smaller than 800 will be scaled up in size
    transforms.Resize(400),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(saturation=0.2),
    transforms.ToTensor(),  # required
    normalize_transform(),  # required
])

dataset = Dataset(LABEL_DIR, IMAGE_DIR, transform=img_transform)
labels = ['camera']
model = Model(classes=labels)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
losses = model.fit(loader, epochs=10, learning_rate=0.005)
plt.plot(losses)
plt.show()
model.save('cam_model.pth')