コード例 #1
0
ファイル: demo.py プロジェクト: lihaojia24/pytorch-dt
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms

import os
import sys
# os.chdir('/home/hector/project/proj-pytorch/pytorch-dt/')
# sys.path.insert(0, '/home/hector/project/proj-pytorch/pytorch-dt/')


from PIL import Image, ImageDraw
from torch.autograd import Variable
from models.ssd import SSD300, SSDBoxCoder

print('Loading model..')
net = SSD300(num_classes=2)

checkpoint = torch.load('./example/ssd+/checkpoint/ckpt.pth')
net.load_state_dict(checkpoint['net'])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# cudnn.benchmark = True
net.to(device)

net.eval()

print('Loading image..')
img_org = Image.open('~/project/data/caltech/JPEGImages/set04_V002_I00059_usatrain.jpg')
ow = oh = 300
img = img_org.resize((ow, oh))

print('Predicting..')
コード例 #2
0
from utils.boxes import create_prior_boxes
from utils.boxes import calculate_intersection_over_union
from utils.boxes import denormalize_box
from models.ssd import SSD300
from metrics import compute_average_precision
# from metrics import compute_average_precision_2
from metrics import compute_precision_and_recall
# from utils.visualizer import draw_image_boxes
from utils.datasets import get_arg_to_class
import cv2

dataset_name = 'VOC2007'
image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/'

weights_path = '../trained_models/SSD300_weights.hdf5'
model = SSD300(weights_path=weights_path)
prior_boxes = create_prior_boxes()
input_shape = model.input_shape[1:3]
class_threshold = .1
iou_nms_threshold = .45
iou_threshold = .5
num_classes = 21

average_precisions = []
for ground_truth_class_arg in range(1, num_classes):
    labels = []
    scores = []
    class_names = get_class_names(dataset_name)
    selected_classes = [class_names[0]] + [class_names[ground_truth_class_arg]]
    num_ground_truth_boxes = 0
    class_decoder = get_arg_to_class(class_names)
コード例 #3
0
def _main(args):
    start_time = timer()
    input_path = os.path.expanduser(args.input_path)
    output_path = os.path.expanduser(args.output_path)

    if not os.path.exists(output_path):
        print('Creating output path {}'.format(output_path))
        os.mkdir(output_path)

    logging.basicConfig(filename=output_path + "/tracking.log", level=logging.DEBUG)

    #parse car positions and angles
    print("Parsing timestamps and oxts files...")
    if args.oxts.startswith('..'):
        parse_oxts(input_path + "/" + args.oxts, input_path + "/" + args.time_stamps)
    else:
        parse_oxts(args.oxts, args.time_stamps)
    print("Done. Data acquired.")

    dataset_name = 'VOC2007'
    NUM_CLASSES = 21

    weights_filename = args.model_path
    model = SSD300(num_classes=NUM_CLASSES)
    prior_boxes = create_prior_boxes(model)
    model.load_weights(weights_filename)

    # drawing stuff

    # Generate colors for drawing bounding boxes.
    hsv_tuples = [(x / NUM_CLASSES, 1., 1.)
                  for x in range(NUM_CLASSES)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(
        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
            colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.

    class_names = get_class_names(dataset_name)
    box_scale_factors=[.1, .1, .2, .2] # maybe adjust to our image size
    background_index=0
    lower_probability_threshold=.1
    iou_threshold=.2
    num_classes = len(class_names) # sould be equal to NUM_CLASSES
    arg_to_class = dict(zip(list(range(num_classes)), class_names))

    frame_idx = 0

    for image_file in os.listdir(input_path):
        try:
            image_type = imghdr.what(os.path.join(input_path, image_file))
            if not image_type:
                print("frame dropped")
                continue
        except IsADirectoryError:
            print("frame dropped")
            continue
        image = Image.open(os.path.join(input_path, image_file))
        image_data = np.array(image)

        selected_boxes = predict(model, image_data, prior_boxes,
                                image_data.shape[0:2], num_classes,
                                lower_probability_threshold,
                                iou_threshold,
                                background_index,
                                box_scale_factors)
        if selected_boxes is not None:
            x_mins = selected_boxes[:, 0]
            y_mins = selected_boxes[:, 1]
            x_maxs = selected_boxes[:, 2]
            y_maxs = selected_boxes[:, 3]
            classes = selected_boxes[:, 4:]
            num_boxes = len(selected_boxes) 
        else:
            num_boxes = 0
            print("frame dropped, no boxes")

        print('Found {} boxes for {}'.format(num_boxes, image_file))

        font = ImageFont.truetype(
            font='font/FiraMono-Medium.otf',
            size=11) # np.floor(3e-2 * image.size[1] + 0.5).astype('int32')
        thickness = (image_data.shape[0] + image_data.shape[1]) // 300

        logging.info("Img: " + str(image_file))

        boxes_data = []

        for i in range(num_boxes):
            xmin = int(x_mins[i])
            ymin = int(y_mins[i])
            xmax = int(x_maxs[i])
            ymax = int(y_maxs[i])
            box_class_scores = classes[i]
            label_class = np.argmax(box_class_scores)
            score = box_class_scores[label_class]
            predicted_class = arg_to_class[label_class]

            box = [ymin, xmin, ymax, xmax]
            box = [max(0,v) for v in box] # sometimes it's negative.

            # log positions

            obj_coord = np.array([])

            if predicted_class in ["person", "bicycle", "car", "motorbike", "bus", "train", "truck"] and score > 0.2: #object and classes to track
                if predicted_class in ["bus", "bruck"]: #vehicle
                    predicted_class = "car"
                obj_coord = computeCoordinates(box, frame_idx)
                if obj_coord is not None:
                    hist = histogram(image_data, box)
                    #create data and store it
                    boxes_data.append({
                        'predicted_class': predicted_class,
                        'score': float(score),
                        'coord': obj_coord,
                        'hist': hist
                        })
                    logging.info(predicted_class + " :" + str(obj_coord) + " | " + str(np.linalg.norm(obj_coord)))

            # end log positions
            if saveImages:
                if obj_coord is not None:
                    label = '{} {:.2f} {} {:.2f}'.format(predicted_class, score, str(obj_coord), np.linalg.norm(obj_coord))
                else:
                    label = '{} {:.2f}'.format(predicted_class, score)
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)

                top, left, bottom, right = box
                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
                right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
                print(label, (left, top), (right, bottom))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle(
                        [left + i, top + i, right - i, bottom - i],
                        outline=colors[label_class])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill=colors[label_class])
                draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                del draw

        frame_idx += 1
        global data_frames
        data_frames.append(boxes_data)
        if saveImages:
            image.save(os.path.join(output_path, image_file), quality=80)

    now = timer()
    start_trj_time = timer()
    print("Time elapsed CNN: " + str(now - start_time) + " seconds")
    print("Calculating trajectories...")
    calculate_trajectories()

    now = timer()
    print("Done. Time elapsed: " + str(now - start_trj_time) + " seconds\n\n")
    print("Total time elapsed: " + str(now - start_time) + " seconds")
コード例 #4
0
            image_array = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            selected_boxes = predict(model, image_array, prior_boxes,
                                    frame.shape[0:2], self.num_classes,
                                    self.lower_probability_threshold,
                                    self.iou_threshold,
                                    self.background_index,
                                    self.box_scale_factors)
            if selected_boxes is None:
                continue
            draw_video_boxes(selected_boxes, frame, self.arg_to_class,
                                        self.colors, self.font)

            cv2.imshow('webcam', frame)
            if cv2.waitKey(1)&0xFF == ord('q'):
                break
        camera.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    from models.ssd import SSD300
    from utils.boxes import create_prior_boxes
    num_classes = 21
    dataset_name = 'VOC2007'
    weights_filename = '../trained_models/weights_SSD300.hdf5'
    model = SSD300(num_classes=num_classes)
    prior_boxes = create_prior_boxes(model)
    model.load_weights(weights_filename)
    video = VideoTest(prior_boxes, dataset_name)
    video.start_video(model)

コード例 #5
0
import pickle
from models.ssd import SSD300

tf_weights = pickle.load(open('tf_w.pkl', 'rb'))
model = SSD300()
for layer_arg, layer in enumerate(model.layers):
    print(layer_arg)
    layer_weights = tf_weights[layer_arg]
    layer.set_weights(layer_weights)


コード例 #6
0
ファイル: model_factory.py プロジェクト: catree/mcv-m5
    def make_one_net_model(self, cf, in_shape, loss, metrics, optimizer):
        # Create the *Keras* model
        if cf.model_name == 'fcn8':
            model = build_fcn8(in_shape,
                               cf.dataset.n_classes,
                               cf.weight_decay,
                               freeze_layers_from=cf.freeze_layers_from,
                               load_pretrained=cf.load_imageNet)
        elif cf.model_name == 'unet':
            model = build_unet(in_shape,
                               cf.dataset.n_classes,
                               cf.weight_decay,
                               freeze_layers_from=cf.freeze_layers_from,
                               path_weights=None)
        elif cf.model_name == 'segnet_basic':
            model = build_segnet(in_shape,
                                 cf.dataset.n_classes,
                                 cf.weight_decay,
                                 freeze_layers_from=cf.freeze_layers_from,
                                 path_weights=None,
                                 basic=True)
        elif cf.model_name == 'segnet_vgg':
            model = build_segnet(in_shape,
                                 cf.dataset.n_classes,
                                 cf.weight_decay,
                                 freeze_layers_from=cf.freeze_layers_from,
                                 path_weights=None,
                                 basic=False)
        elif cf.model_name == 'resnetFCN':
            model = build_resnetFCN(in_shape,
                                    cf.dataset.n_classes,
                                    cf.weight_decay,
                                    freeze_layers_from=cf.freeze_layers_from,
                                    path_weights=None)
        elif cf.model_name == 'densenetFCN':
            model = build_densenetFCN(in_shape,
                                      cf.dataset.n_classes,
                                      cf.weight_decay,
                                      freeze_layers_from=cf.freeze_layers_from,
                                      path_weights=None)
        elif cf.model_name == 'lenet':
            model = build_lenet(in_shape, cf.dataset.n_classes,
                                cf.weight_decay)
        elif cf.model_name == 'alexNet':
            model = build_alexNet(in_shape, cf.dataset.n_classes,
                                  cf.weight_decay)
        elif cf.model_name == 'vgg16':
            model = build_vgg(in_shape,
                              cf.dataset.n_classes,
                              16,
                              cf.weight_decay,
                              load_pretrained=cf.load_imageNet,
                              freeze_layers_from=cf.freeze_layers_from)
        elif cf.model_name == 'vgg19':
            model = build_vgg(in_shape,
                              cf.dataset.n_classes,
                              19,
                              cf.weight_decay,
                              load_pretrained=cf.load_imageNet,
                              freeze_layers_from=cf.freeze_layers_from)
        elif cf.model_name == 'resnet50':
            model = build_resnet50(in_shape,
                                   cf.dataset.n_classes,
                                   cf.weight_decay,
                                   load_pretrained=cf.load_imageNet,
                                   freeze_layers_from=cf.freeze_layers_from)
        elif cf.model_name == 'InceptionV3':
            model = build_inceptionV3(in_shape,
                                      cf.dataset.n_classes,
                                      cf.weight_decay,
                                      load_pretrained=cf.load_imageNet,
                                      freeze_layers_from=cf.freeze_layers_from)
        elif cf.model_name == 'yolo':
            model = build_yolo(in_shape,
                               cf.dataset.n_classes,
                               cf.dataset.n_priors,
                               load_pretrained=cf.load_imageNet,
                               freeze_layers_from=cf.freeze_layers_from,
                               tiny=False)
        elif cf.model_name == 'tiny-yolo':
            model = build_yolo(in_shape,
                               cf.dataset.n_classes,
                               cf.dataset.n_priors,
                               load_pretrained=cf.load_imageNet,
                               freeze_layers_from=cf.freeze_layers_from,
                               tiny=True)
        elif cf.model_name == 'own':
            model = build_own(in_shape, cf.dataset.n_classes)
        elif cf.model_name == 'ssd512':
            model = SSD300(in_shape, cf.dataset.n_classes)
        elif cf.model_name == 'segnet':
            model = build_segnet(in_shape, cf.dataset.n_classes)
        elif cf.model_name == 'VGGSegnet':
            model = VGGSegnet(in_shape, cf.dataset.n_classes)

        else:
            raise ValueError('Unknown model')

        # Load pretrained weights
        if cf.load_pretrained:
            print('   loading model weights from: ' + cf.weights_file + '...')
            model.load_weights(cf.weights_file, by_name=True)

        # Compile model
        if cf.model_name == 'ssd512':
            model.compile(loss=loss.compute_loss, optimizer=optimizer)
        else:
            model.compile(loss=loss, metrics=metrics, optimizer=optimizer)
        # Show model structure
        if cf.show_model:
            model.summary()
            plot_model(model, to_file=os.path.join(cf.savepath, 'model.png'))

        # Output the model
        print('   Model: ' + cf.model_name)
        # model is a keras model, Model is a class wrapper so that we can have
        # other models (like GANs) made of a pair of keras models, with their
        # own ways to train, test and predict
        return One_Net_Model(model, cf, optimizer)
コード例 #7
0
                            origin_stack.pop(0)
                            origin_stack.append(curl[8:120, 30:142, :])
                            clip = np.array(origin_stack)
                            clip = np.expand_dims(clip, axis=0)
                            clip = preprocessing(clip)
                            c3d_result = c3d.predict(clip)
                            if max(c3d_result[0]) >= conf_thresh:
                                label = np.argmax(c3d_result[0])
                                action_name = action_class[label]
                                cv2.putText(
                                    orig_image,
                                    action_name + '%.2f' % max(c3d_result[0]),
                                    (xmin + 10, ymin + 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255),
                                    1)
        cv2.imshow("SSD result", orig_image)
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break


if __name__ == '__main__':
    action_class = ['standing', 'sitting', 'walking']
    clip_length = 16
    input_shape = (300, 300, 3)
    ssd_model = SSD(input_shape, num_classes=21)
    ssd_model.load_weights('weights_SSD300.hdf5')
    c3d_input_shape = (112, 112, 16, 3)
    c3d = c3d_model(c3d_input_shape, nb_classes=3)
    c3d.load_weights('')
    run_camera(input_shape, ssd_model, action_class, clip_length, c3d)