Exemple #1
0
def check_on_train_clip(video_id, weights, suffix, is_test=False):
    if 'resnet' in weights:
        model = build_resnet(input_shape)
    else:
        model = build_model(input_shape)

    pool = ThreadPool(processes=8)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test)

    outdir = '../output/predictions_ssd/' + video_id + suffix
    os.makedirs(outdir, exist_ok=True)
    batch_size = 4

    frame_id = 0
    for x_batch, frames in dataset.generate_x_for_train_video_id(video_id=video_id, batch_size=batch_size, pool=pool):
        predictions = model.predict(x_batch)
        results = bbox_util.detection_out(predictions)
        for batch_id in range(predictions.shape[0]):
            print(results[batch_id])
            display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id]) * 255,
                                   results=results,
                                   res_idx=batch_id)
            plt.savefig('{}/{:04}.jpg'.format(outdir, frame_id+1))
            plt.clf()
            frame_id += 1
            print(frame_id)
 def __init__(self, class_names, model, model_input_shape, image_shape):
     self.class_names = class_names
     self.num_classes = len(class_names)
     self.model = model
     self.bbox_util = BBoxUtility(self.num_classes)
     self.model_input_shape = model_input_shape
     self.image_width = image_shape[0]
     self.image_height = image_shape[1]
     self.hit_window_list = []
Exemple #3
0
def generate_predictions_on_train_clips(weights, suffix, from_idx, count, use_requested_frames=False, is_test=False):
    if 'resnet' in weights:
        model = build_resnet(input_shape)
    else:
        model = build_model(input_shape)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test)

    items = list(sorted(dataset.video_clips.keys()))

    pool = ThreadPool(processes=4)
    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)

    for video_id in items[from_idx: from_idx+count]:
        print(video_id)
        if is_test:
            outdir = '../output/predictions_ssd_roi2_test/{}/{}'.format(suffix, video_id)
        else:
            outdir = '../output/predictions_ssd_roi2/{}/{}'.format(suffix, video_id)
        os.makedirs(outdir, exist_ok=True)
        batch_size = 4

        if use_requested_frames:
            requested_frames = pickle.load(open('../output/used_frames.pkl', 'rb'))
            frames = requested_frames[video_id]
        else:
            frames = list(range(len(dataset.video_clips[video_id])))

        new_frames = []
        for frame in frames:
            if not os.path.exists('{}/{:04}.npy'.format(outdir, frame+1)):
                new_frames.append(frame)

        if len(new_frames) == 0:
            continue

        for x_batch, used_frames in utils.parallel_generator(dataset.generate_x_for_train_video_id(video_id=video_id,
                                                                          batch_size=batch_size,
                                                                          frames=new_frames,
                                                                          pool=pool), executor=executor):
            predictions = model.predict(x_batch)
            results = bbox_util.detection_out(predictions)
            for batch_id in range(predictions.shape[0]):
                np.save('{}/{:04}.npy'.format(outdir, used_frames[batch_id]+1), results[batch_id])
                print(used_frames[batch_id])
Exemple #4
0
def check_dataset():
    model = build_model(input_shape)
    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0, pos_cost_multiplier=1.2).compute_loss,
                  optimizer=Adam(lr=1e-4))
    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    dataset = SSDDataset(bbox_util=bbox_util, preprocess_input=lambda x: x)

    batch_size = 2
    for images, ys in dataset.generate_ssd(batch_size=batch_size, skip_assign_boxes=True, is_training=True, verbose=True):
        print('min value:', np.min(images[0]))
        print('max value:', np.max(images[0]))

        img = images[0]
        plt.imshow(img / 255.)
        currentAxis = plt.gca()
        for y in ys[0]:
            xmin = int(round(y[0] * img.shape[1]))
            ymin = int(round(y[1] * img.shape[0]))
            xmax = int(round(y[2] * img.shape[1]))
            ymax = int(round(y[3] * img.shape[0]))

            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
            color = 'yellow'
            currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        plt.show()
Exemple #5
0
def check(weights):
    model = build_model(input_shape)

    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss,
                  optimizer=Adam(lr=1e-3))
    model.load_weights(weights)
    model.summary()

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)
    dataset = SSDDataset(bbox_util=bbox_util)

    for x_batch, y_batch in dataset.generate_ssd(batch_size=4, is_training=False, verbose=True, always_shuffle=True):
        predictions = model.predict(x_batch)
        results = bbox_util.detection_out(predictions)
        for batch_id in range(4):
            display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id])*255,
                                   results=results,
                                   res_idx=batch_id)
            plt.show()
Exemple #6
0
def train_resnet():
    model_name = 'ssd_resnet_720'
    checkpoints_dir = '../output/checkpoints/detect_ssd/' + model_name
    tensorboard_dir = '../output/logs/detect_ssd/' + model_name
    os.makedirs(checkpoints_dir, exist_ok=True)
    os.makedirs(tensorboard_dir, exist_ok=True)

    model = build_resnet(input_shape=input_shape)
    model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.1).compute_loss,
                  optimizer=Adam(lr=3e-5))
    model.summary()
    # model.load_weights('../output/checkpoints/detect_ssd/ssd_resnet_720/checkpoint-best-018-0.2318.hdf5')
    # model.load_weights('../output/checkpoints/detect_ssd/ssd_resnet_720/checkpoint-best-053-0.1058.hdf5')

    priors = priors_from_model(model)
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    dataset = SSDDataset(bbox_util=bbox_util, preprocess_input=preprocess_input)

    batch_size = 8
    val_batch_size = 8

    nb_epoch = 50

    checkpoint_best = ModelCheckpoint(checkpoints_dir + "/checkpoint-best-{epoch:03d}-{val_loss:.4f}.hdf5",
                                      verbose=1,
                                      save_weights_only=False,
                                      save_best_only=True)
    checkpoint_periodical = ModelCheckpoint(checkpoints_dir + "/checkpoint-{epoch:03d}-{val_loss:.4f}.hdf5",
                                            verbose=1,
                                            save_weights_only=False,
                                            period=1)

    tensorboard = TensorBoard(tensorboard_dir, histogram_freq=16, write_graph=False, write_images=False)

    model.fit_generator(dataset.generate_ssd(batch_size=batch_size, is_training=True),
                        steps_per_epoch=dataset.nb_train_samples // batch_size,
                        epochs=nb_epoch,
                        verbose=1,
                        callbacks=[checkpoint_best, checkpoint_periodical, tensorboard],
                        validation_data=dataset.generate_ssd(batch_size=val_batch_size, is_training=False),
                        validation_steps=dataset.nb_test_samples // val_batch_size,
                        initial_epoch=0)
def _main():
    parser = argparse.ArgumentParser(description='instagram post describer')
    parser.add_argument('--login.user',
                        dest='username',
                        help='instagram username')
    parser.add_argument('--login.password',
                        dest='password',
                        help='instagram password')
    # parser.add_argument('--post_id', dest='post_id',
    #                     help='post id (click post then see after the url "https://www.instagram.com/p/")')

    args = parser.parse_args()
    username, password = args.username, args.password
    image_paths = _profile_images(username)
    images, inputs = _load_images(image_paths)

    voc_classes = [
        'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
        'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person',
        'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
    ]
    NUM_CLASSES = len(voc_classes) + 1
    bbox_util = BBoxUtility(NUM_CLASSES)
    model = _load_model(NUM_CLASSES)
    model.summary()

    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    for i, img in enumerate(images):
        # Parse the outputs.
        det_label = results[i][:, 0]
        det_conf = results[i][:, 1]
        det_xmin = results[i][:, 2]
        det_ymin = results[i][:, 3]
        det_xmax = results[i][:, 4]
        det_ymax = results[i][:, 5]

        # Get detections with confidence higher than 0.6.
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin = det_xmin[top_indices]
        top_ymin = det_ymin[top_indices]
        top_xmax = det_xmax[top_indices]
        top_ymax = det_ymax[top_indices]

        colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

        for j in range(top_conf.shape[0]):
            xmin = int(round(top_xmin[j] * img.shape[1]))
            ymin = int(round(top_ymin[j] * img.shape[0]))
            xmax = int(round(top_xmax[j] * img.shape[1]))
            ymax = int(round(top_ymax[j] * img.shape[0]))
            score = top_conf[j]
            label = int(top_label_indices[j])
            label_name = voc_classes[label - 1]
            display_txt = '{:0.2f}, {}'.format(score, label_name)
            coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1
            color = colors[label]
            color = (int(255 * color[0]), int(255 * color[1]),
                     int(255 * color[2]))
            # draw rectangle
            image = PIL.Image.fromarray(np.uint8(img))
            draw = ImageDraw.Draw(image)
            draw.rectangle(((xmin, ymin), (xmax, ymax)), outline=color)
            draw.text((xmin, ymin), display_txt)
            image.save('{}.png'.format(i))
Exemple #8
0
from ssd.ssd_training import MultiboxLoss
from ssd.ssd_utils import BBoxUtility

plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

GPU_COUNT = 4
NUM_CLASSES = 6  #4
INPUT_SHAPE = (300, 300, 3)
EPOCHS = 200
#BATCH_SIZE = 4
BATCH_SIZE = 4 * GPU_COUNT
priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
bbox_util = BBoxUtility(NUM_CLASSES, priors)

gt = pickle.load(open('datasets.pkl', 'rb'))
keys = sorted(gt.keys())
num_train = int(round(0.8 * len(keys)))
train_keys = keys[:num_train]
val_keys = keys[num_train:]
num_val = len(val_keys)


class Generator(object):
    def __init__(self,
                 gt,
                 bbox_util,
                 batch_size,
                 path_prefix,
from ssd.net import *

model_type = sys.argv[1]
data_file, num_classes = 'train/IITB.pkl', 5
path_prefix = 'train/data/'
img_height, img_width = 300, 300
prior_box_configs = prior_box_configs_300
if model_type == '512':
    img_height, img_width = 512, 512
    prior_box_configs = prior_box_configs_512
variances = [0.1, 0.1, 0.2, 0.2]

prior_boxes = get_prior_boxes(img_width, img_height, prior_box_configs,
                              variances)
#pickle.dump(prior_boxes, open('default_prior_boxes_{}x{}.pkl'.format(img_height, img_width), 'wb'))
bbox_util = BBoxUtility(num_classes, prior_boxes, use_tf=True)

data = pickle.load(open(data_file, 'rb'))
keys = data.keys()
num_train = int(round(0.8 * len(keys)))
train_keys, val_keys = keys[:num_train], keys[num_train:]

data_gen = Generator(data, bbox_util, 1, path_prefix, train_keys, val_keys,
                     (img_height, img_width))

model = SSD300((img_height, img_width, 3), num_classes=num_classes)
if model_type == '512':
    model = SSD512((img_height, img_width, 3), num_classes=num_classes)
model.compile(optimizer=Adam(lr=3e-4),
              loss=MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss)
model.summary()
class DetectCars(object):
    """ Class for testing a trained SSD model and return boxes containing cars     
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.                       
            model:       An SSD model. It should already be trained .                       
            model_input_shape: The shape that the model expects for its input, as a tuple, for example (300, 300, 3)                         
            image_shape: The shape of input image, as a tuple, for example (720, 1280, 3) 
    """
    def __init__(self, class_names, model, model_input_shape, image_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.bbox_util = BBoxUtility(self.num_classes)
        self.model_input_shape = model_input_shape
        self.image_width = image_shape[0]
        self.image_height = image_shape[1]
        self.hit_window_list = []

    def search_cars(self, rgb_image, conf_thresh=0.6):
        """ search cars on rgb image and return boxes contain cars and confidence larger than conf_thresh.
        
        # Arguments
        rgb_image: Image on which, ssd search cars.
        conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized.
        """
        # Resize input image to model image
        model_im_size = (self.model_input_shape[0], self.model_input_shape[1])
        model_image = cv2.resize(rgb_image, model_im_size)

        # Use model to predict
        inputs = [image.img_to_array(model_image)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)
        y = self.model.predict(x)

        # Get boxes result
        results = self.bbox_util.detection_out(y)

        boxes_list = []
        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            for i in range(top_conf.shape[0]):
                class_num = int(top_label_indices[i])
                if self.class_names[class_num] == 'car':
                    xmin = int(round(top_xmin[i] * self.image_width))
                    ymin = int(round(top_ymin[i] * self.image_height))
                    xmax = int(round(top_xmax[i] * self.image_width))
                    ymax = int(round(top_ymax[i] * self.image_height))
                    boxes_list.append([xmin, ymin, xmax, ymax])
        return boxes_list

############################### Augmented Display ###############################

    def add_heat(self, orignal_image, hit_windows):
        '''
        Loop over hit_windows to create a heatmap
        '''
        heatmap = np.zeros_like(orignal_image[:, :, 0]).astype(np.float)
        # Iterate through list of bboxes
        for xmin, ymin, xmax, ymax in hit_windows:
            # Add += 1 for all pixels inside each bbox
            # Assuming each "box" takes the form ((x1, y1), (x2, y2))
            heatmap[int(ymin):int(ymax), int(xmin):int(xmax)] += 1
        # Return updated heatmap
        return heatmap

    def filter_heatmap(self, heatmap, threshold):
        '''
        apply threshold to heatmap to get rid of the interference 
        '''
        filtered_heatmap = np.zeros_like(heatmap).astype(np.float)
        # Zero out pixels below the threshold
        filtered_heatmap[heatmap > threshold] = 255
        filtered_heatmap[heatmap <= threshold] = 0
        # Return thresholded map
        return filtered_heatmap

    def label_bboxes(self, filtered_heatmap):
        '''
        draw a box to indicate there is a car
        '''
        labels = label(filtered_heatmap)
        # Iterate through all detected cars
        bbox_list = []
        for car_number in range(1, labels[1] + 1):
            # Find pixels with each car_number label value
            nonzero = (labels[0] == car_number).nonzero()
            # Identify x and y values of those pixels
            nonzeroy = np.array(nonzero[0])
            nonzerox = np.array(nonzero[1])
            # Define a bounding box based on min/max x and y
            bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox),
                                                           np.max(nonzeroy)))
            bbox_list.append(bbox)
        # Return the box
        return bbox_list

    def augmented_display(self, orignal_image, hit_windows):
        '''
        augmented display pipeline
        '''
        # Add heat to each box in box list
        heatmap = self.add_heat(orignal_image, hit_windows)
        # Apply threshold to help remove false positives
        filtered_heatmap = self.filter_heatmap(heatmap, threshold=1)
        # Find final boxes from heatmap using label function
        # Indicate the cars on image
        bbox_list = self.label_bboxes(filtered_heatmap)
        #print(bbox_list)
        draw_img = np.copy(orignal_image)

        for bbox in bbox_list:
            cv2.rectangle(draw_img, bbox[0], bbox[1], (0, 255, 0), 3)
        return draw_img, heatmap, filtered_heatmap

    def image_prcess(self, rgb_image):
        '''
        process a single images
        '''
        hit_windows = self.search_cars(rgb_image)
        for xmin, ymin, xmax, ymax in hit_windows:
            if len(self.hit_window_list) < 10:
                self.hit_window_list.append([xmin, ymin, xmax, ymax])
            else:
                temp_list = self.hit_window_list[1:]
                temp_list.append([xmin, ymin, xmax, ymax])
                self.hit_window_list = temp_list
        if len(self.hit_window_list) > 0:
            rgb_image, heatmap, filtered_heatmap = self.augmented_display(
                rgb_image, self.hit_window_list)
            #cv2.rectangle(rgb_image,(xmin, ymin),(xmax, ymax),(0, 0, 255), 4)
        return rgb_image
classes = [
    'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
    'Chair', 'Cow', 'DiningTable', 'Dog', 'Horse', 'Motorbike', 'Person',
    'PottedPlant', 'Sheep', 'Sofa', 'Train', 'TV/Monitor', 'Background'
]
num_classes = len(classes)
img_height, img_width = 300, 300
model = SSD300((img_height, img_width, 3), num_classes=num_classes)
if model_type == '512':
    img_height, img_width = 512, 512
    model = SSD512((img_height, img_width, 3), num_classes=num_classes)

model.summary()
model.load_weights('weights/tf_VGG_VOC0712Plus_SSD_{}x{}.hdf5'.format(
    img_height, img_width))
bbox_util = BBoxUtility(num_classes, session=K.get_session(), use_tf=True)

inputs = []
images = []
image_names = os.listdir(base_path)
for image_name in image_names:
    image = cv2.imread(base_path + image_name)
    images.append(image[:, :, ::-1])
    image = image.astype(np.float32)
    image = cv2.resize(image, (img_height, img_width))
    image[:, :, 0] -= 104.0
    image[:, :, 1] -= 117.0
    image[:, :, 2] -= 124.0
    inputs.append(image)
inputs = np.asarray(inputs)
np.set_printoptions(suppress=True)

config = tf.ConfigProto(device_count={'GPU': 0})
sess = tf.Session(config=config)
set_session(sess)

voc_classes = [
    "honoka", "eli", "kotori", "umi", "rin", "maki", "hanayo", "nico", "nozomi"
]

NUM_CLASSES = len(voc_classes) + 1
input_shape = (300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
weight_file = './checkpoints/weights.31-1.40.hdf5'
model.load_weights(weight_file, by_name=True)
bbox_util = BBoxUtility(NUM_CLASSES)
# sess.graph.finalize()


def _adjust_inference(result, img_shape, threshold=0.7):
    # Parse the outputs.
    det_label = result[:, 0]
    det_conf = result[:, 1]
    det_xmin = result[:, 2]
    det_ymin = result[:, 3]
    det_xmax = result[:, 4]
    det_ymax = result[:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= threshold]