Beispiel #1
0
def check_on_train_clip(video_id, weights, suffix, is_test=False):
    if 'resnet' in weights:
        model = build_resnet(input_shape)
    else:
        model = build_model(input_shape)

    pool = ThreadPool(processes=8)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test)

    outdir = '../output/predictions_ssd/' + video_id + suffix
    os.makedirs(outdir, exist_ok=True)
    batch_size = 4

    frame_id = 0
    for x_batch, frames in dataset.generate_x_for_train_video_id(video_id=video_id, batch_size=batch_size, pool=pool):
        predictions = model.predict(x_batch)
        results = bbox_util.detection_out(predictions)
        for batch_id in range(predictions.shape[0]):
            print(results[batch_id])
            display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id]) * 255,
                                   results=results,
                                   res_idx=batch_id)
            plt.savefig('{}/{:04}.jpg'.format(outdir, frame_id+1))
            plt.clf()
            frame_id += 1
            print(frame_id)
Beispiel #2
0
def generate_predictions_on_train_clips(weights, suffix, from_idx, count, use_requested_frames=False, is_test=False):
    if 'resnet' in weights:
        model = build_resnet(input_shape)
    else:
        model = build_model(input_shape)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test)

    items = list(sorted(dataset.video_clips.keys()))

    pool = ThreadPool(processes=4)
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)

    for video_id in items[from_idx: from_idx+count]:
        print(video_id)
        if is_test:
            outdir = '../output/predictions_ssd_roi2_test/{}/{}'.format(suffix, video_id)
        else:
            outdir = '../output/predictions_ssd_roi2/{}/{}'.format(suffix, video_id)
        os.makedirs(outdir, exist_ok=True)
        batch_size = 4

        if use_requested_frames:
            requested_frames = pickle.load(open('../output/used_frames.pkl', 'rb'))
            frames = requested_frames[video_id]
        else:
            frames = list(range(len(dataset.video_clips[video_id])))

        new_frames = []
        for frame in frames:
            if not os.path.exists('{}/{:04}.npy'.format(outdir, frame+1)):
                new_frames.append(frame)

        if len(new_frames) == 0:
            continue

        for x_batch, used_frames in utils.parallel_generator(dataset.generate_x_for_train_video_id(video_id=video_id,
                                                                          batch_size=batch_size,
                                                                          frames=new_frames,
                                                                          pool=pool), executor=executor):
            predictions = model.predict(x_batch)
            results = bbox_util.detection_out(predictions)
            for batch_id in range(predictions.shape[0]):
                np.save('{}/{:04}.npy'.format(outdir, used_frames[batch_id]+1), results[batch_id])
                print(used_frames[batch_id])
Beispiel #3
0
def check(weights):
    model = build_model(input_shape)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util)

    for x_batch, y_batch in dataset.generate_ssd(batch_size=4, is_training=False, verbose=True, always_shuffle=True):
        predictions = model.predict(x_batch)
        results = bbox_util.detection_out(predictions)
        for batch_id in range(4):
            display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id])*255,
                                   results=results,
                                   res_idx=batch_id)
            plt.show()
Beispiel #4
0
def _main():
    parser = argparse.ArgumentParser(description='instagram post describer')
    parser.add_argument('--login.user',
                        dest='username',
                        help='instagram username')
    parser.add_argument('--login.password',
                        dest='password',
                        help='instagram password')
    # parser.add_argument('--post_id', dest='post_id',
    #                     help='post id (click post then see after the url "https://www.instagram.com/p/")')

    args = parser.parse_args()
    username, password = args.username, args.password
    image_paths = _profile_images(username)
    images, inputs = _load_images(image_paths)

    voc_classes = [
        'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
        'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person',
        'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
    ]
    NUM_CLASSES = len(voc_classes) + 1
    bbox_util = BBoxUtility(NUM_CLASSES)
    model = _load_model(NUM_CLASSES)
    model.summary()

    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    for i, img in enumerate(images):
        # Parse the outputs.
        det_label = results[i][:, 0]
        det_conf = results[i][:, 1]
        det_xmin = results[i][:, 2]
        det_ymin = results[i][:, 3]
        det_xmax = results[i][:, 4]
        det_ymax = results[i][:, 5]

        # Get detections with confidence higher than 0.6.
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = det_xmin[top_indices]
        top_ymin = det_ymin[top_indices]
        top_xmax = det_xmax[top_indices]
        top_ymax = det_ymax[top_indices]

        colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

        for j in range(top_conf.shape[0]):
            xmin = int(round(top_xmin[j] * img.shape[1]))
            ymin = int(round(top_ymin[j] * img.shape[0]))
            xmax = int(round(top_xmax[j] * img.shape[1]))
            ymax = int(round(top_ymax[j] * img.shape[0]))
            score = top_conf[j]
            label = int(top_label_indices[j])
            label_name = voc_classes[label - 1]
            display_txt = '{:0.2f}, {}'.format(score, label_name)
            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
            color = colors[label]
            color = (int(255 * color[0]), int(255 * color[1]),
                     int(255 * color[2]))
            # draw rectangle
            image = PIL.Image.fromarray(np.uint8(img))
            draw = ImageDraw.Draw(image)
            draw.rectangle(((xmin, ymin), (xmax, ymax)), outline=color)
            draw.text((xmin, ymin), display_txt)
            image.save('{}.png'.format(i))
class DetectCars(object):
    """ Class for testing a trained SSD model and return boxes containing cars     
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.                       
            model:       An SSD model. It should already be trained .                       
            model_input_shape: The shape that the model expects for its input, as a tuple, for example (300, 300, 3)                         
            image_shape: The shape of input image, as a tuple, for example (720, 1280, 3) 
    """
    def __init__(self, class_names, model, model_input_shape, image_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.bbox_util = BBoxUtility(self.num_classes)
        self.model_input_shape = model_input_shape
        self.image_width = image_shape[0]
        self.image_height = image_shape[1]
        self.hit_window_list = []

    def search_cars(self, rgb_image, conf_thresh=0.6):
        """ search cars on rgb image and return boxes contain cars and confidence larger than conf_thresh.
        
        # Arguments
        rgb_image: Image on which, ssd search cars.
        conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized.
        """
        # Resize input image to model image
        model_im_size = (self.model_input_shape[0], self.model_input_shape[1])
        model_image = cv2.resize(rgb_image, model_im_size)

        # Use model to predict
        inputs = [image.img_to_array(model_image)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)
        y = self.model.predict(x)

        # Get boxes result
        results = self.bbox_util.detection_out(y)

        boxes_list = []
        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            for i in range(top_conf.shape[0]):
                class_num = int(top_label_indices[i])
                if self.class_names[class_num] == 'car':
                    xmin = int(round(top_xmin[i] * self.image_width))
                    ymin = int(round(top_ymin[i] * self.image_height))
                    xmax = int(round(top_xmax[i] * self.image_width))
                    ymax = int(round(top_ymax[i] * self.image_height))
                    boxes_list.append([xmin, ymin, xmax, ymax])
        return boxes_list

############################### Augmented Display ###############################

    def add_heat(self, orignal_image, hit_windows):
        '''
        Loop over hit_windows to create a heatmap
        '''
        heatmap = np.zeros_like(orignal_image[:, :, 0]).astype(np.float)
        # Iterate through list of bboxes
        for xmin, ymin, xmax, ymax in hit_windows:
            # Add += 1 for all pixels inside each bbox
            # Assuming each "box" takes the form ((x1, y1), (x2, y2))
            heatmap[int(ymin):int(ymax), int(xmin):int(xmax)] += 1
        # Return updated heatmap
        return heatmap

    def filter_heatmap(self, heatmap, threshold):
        '''
        apply threshold to heatmap to get rid of the interference 
        '''
        filtered_heatmap = np.zeros_like(heatmap).astype(np.float)
        # Zero out pixels below the threshold
        filtered_heatmap[heatmap > threshold] = 255
        filtered_heatmap[heatmap <= threshold] = 0
        # Return thresholded map
        return filtered_heatmap

    def label_bboxes(self, filtered_heatmap):
        '''
        draw a box to indicate there is a car
        '''
        labels = label(filtered_heatmap)
        # Iterate through all detected cars
        bbox_list = []
        for car_number in range(1, labels[1] + 1):
            # Find pixels with each car_number label value
            nonzero = (labels[0] == car_number).nonzero()
            # Identify x and y values of those pixels
            nonzeroy = np.array(nonzero[0])
            nonzerox = np.array(nonzero[1])
            # Define a bounding box based on min/max x and y
            bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox),
                                                           np.max(nonzeroy)))
            bbox_list.append(bbox)
        # Return the box
        return bbox_list

    def augmented_display(self, orignal_image, hit_windows):
        '''
        augmented display pipeline
        '''
        # Add heat to each box in box list
        heatmap = self.add_heat(orignal_image, hit_windows)
        # Apply threshold to help remove false positives
        filtered_heatmap = self.filter_heatmap(heatmap, threshold=1)
        # Find final boxes from heatmap using label function
        # Indicate the cars on image
        bbox_list = self.label_bboxes(filtered_heatmap)
        #print(bbox_list)
        draw_img = np.copy(orignal_image)

        for bbox in bbox_list:
            cv2.rectangle(draw_img, bbox[0], bbox[1], (0, 255, 0), 3)
        return draw_img, heatmap, filtered_heatmap

    def image_prcess(self, rgb_image):
        '''
        process a single images
        '''
        hit_windows = self.search_cars(rgb_image)
        for xmin, ymin, xmax, ymax in hit_windows:
            if len(self.hit_window_list) < 10:
                self.hit_window_list.append([xmin, ymin, xmax, ymax])
            else:
                temp_list = self.hit_window_list[1:]
                temp_list.append([xmin, ymin, xmax, ymax])
                self.hit_window_list = temp_list
        if len(self.hit_window_list) > 0:
            rgb_image, heatmap, filtered_heatmap = self.augmented_display(
                rgb_image, self.hit_window_list)
            #cv2.rectangle(rgb_image,(xmin, ymin),(xmax, ymax),(0, 0, 255), 4)
        return rgb_image
inputs = []
images = []
image_names = os.listdir(base_path)
for image_name in image_names:
    image = cv2.imread(base_path + image_name)
    images.append(image[:, :, ::-1])
    image = image.astype(np.float32)
    image = cv2.resize(image, (img_height, img_width))
    image[:, :, 0] -= 104.0
    image[:, :, 1] -= 117.0
    image[:, :, 2] -= 124.0
    inputs.append(image)
inputs = np.asarray(inputs)

predictions = model.predict(inputs, batch_size=1, verbose=1)
results = bbox_util.detection_out(predictions)

for i, img in enumerate(images):
    det_label = results[i][:, 0]
    det_conf = results[i][:, 1]
    det_xmin = results[i][:, 2]
    det_ymin = results[i][:, 3]
    det_xmax = results[i][:, 4]
    det_ymax = results[i][:, 5]

    top_indices = [i for i, conf in enumerate(det_conf) if conf >= confidence]
    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]