Example #1
0
def init_net():
    net = dn.load_net(b"/app/darknet/cfg/yolov2-tiny.cfg", b"/app/darknet/yolov2-tiny.weights", 0)
    meta = dn.load_meta(b"/app/darknet/cfg/coco.data")
    return net, meta
Example #2
0
def vehicleCount(root,
                 in_file,
                 output_dir,
                 lineState='Horizontal',
                 thresh=0.5,
                 roiThick=7,
                 offset=0):

    try:
        out_file = in_file.split('/')[-1].split('.')[0] + '_out'

        vehicle_threshold = thresh
        roi = roiThick

        vehicle_weights = 'object_detector/data/vehicle-detector/yolov2.weights'
        vehicle_netcfg = 'object_detector/data/vehicle-detector/yolov2.cfg'
        vehicle_dataset = 'object_detector/data/vehicle-detector/coco.data'

        vehicle_net = dn.load_net(vehicle_netcfg, vehicle_weights, 0)
        vehicle_meta = dn.load_meta(vehicle_dataset)

        writer = None
        cap = cv2.VideoCapture(in_file)
        cnt = 0
        car_count = 0
        while cap.isOpened():

            ret, frame = cap.read()

            if not ret:
                root.statusStrVar.set(
                    'Done...Video saved at {}'.format(output_dir + '/' +
                                                      out_file + '.mp4'))
                break

            WH = frame.shape[:2]
            img = nparray_to_image(frame)
            R, _ = detect(vehicle_net, vehicle_meta, img, vehicle_threshold)

            if lineState == "Horizontal":
                linel = (0, WH[0] - (WH[0] / 4) - offset)
                liner = (WH[1], WH[0] - (WH[0] / 4) - offset)
            elif lineState == "Vertical":
                lineu = (WH[1] - (WH[1] / 4) - offset, 0)
                lined = (WH[1] - (WH[1] / 4) - offset, WH[1])

            R = [r for r in R if r[0] in ['car', 'bus']]

            print 'Processing frame {}'.format(cnt)
            root.statusStrVar.set('Processing frame {}..'.format(cnt))
            print '\t%d cars found' % len(R)

            if len(R):
                WH = np.array(frame.shape[1::-1], dtype=float)
                for i, r in enumerate(R):
                    name = r[0]
                    cx, cy, w, h = (np.array(r[2]) / np.concatenate(
                        (WH, WH))).tolist()
                    tl = (int((cx - w / 2.) * WH[0]), int(
                        (cy - h / 2.) * WH[1]))
                    br = (int((cx + w / 2.) * WH[0]), int(
                        (cy + h / 2.) * WH[1]))
                    print '\t\t{}th car Coodrs : ({}, {})'.format(i, tl, br)

                    cv2.rectangle(frame, tl, br, (255, 0, 0),
                                  2)  #crop_region(Iorig,label)

                    if lineState == "Horizontal":
                        cv2.line(frame, linel, liner, (0, 0, 255), 3)

                        if (cy - h / 2) * WH[1] > liner[1] and (
                                cy - h / 2) * WH[1] < liner[1] + roi:
                            car_count += 1
                    elif lineState == "Vertical":
                        cv2.line(frame, lineu, lined, (0, 0, 255), 3)

                        if (cx - w / 2) * WH[0] > lineu[0] and (
                                cx - w / 2) * WH[0] < lineu[0] + roi:
                            car_count += 1

                    cv2.putText(frame, name, tl, cv2.FONT_HERSHEY_SIMPLEX, 1.5,
                                (0, 0, 255), 3, cv2.LINE_AA)

                    cv2.putText(frame, 'Vehicles crossed : ' + str(car_count),
                                (0, int(WH[1])), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                (0, 0, 0), 2, cv2.LINE_AA)
                print '\n'

            if writer is None:
                fourcc = cv2.VideoWriter_fourcc(*'DIVX')
                writer = cv2.VideoWriter(output_dir + '/' + out_file + '.mp4',
                                         fourcc, 30,
                                         (frame.shape[1], frame.shape[0]),
                                         True)

            writer.write(frame)
            del frame
            cnt += 1

    except:
        traceback.print_exc()
        sys.exit(1)

    writer.release()
    cap.release()
    sys.exit(0)
Example #3
0
file_log_handler = logging.FileHandler("log")
file_log_handler.setLevel(logging.INFO)
file_log_handler.setFormatter(
    logging.Formatter(
        '%(asctime)s - %(levelname)s -%(pathname)s- %(filename)s - %(funcName)s - %(lineno)s - %(message)s'
    ))
logger.addHandler(file_log_handler)
start = time.time()
if len(sys.argv) <= 1:
    net = darknet.load_net("darknet/cfg/yolov3-tiny.cfg".encode('utf-8'),
                           "darknet/yolov3-tiny.weights".encode('utf-8'), 0)
else:
    net = darknet.load_net("darknet/cfg/yolov3.cfg".encode('utf-8'),
                           "darknet/yolov3.weights".encode('utf-8'), 0)

meta = darknet.load_meta("darknet/cfg/coco.data".encode('utf-8'))
end = time.time()
logger.info("Time used for loading model: {}s".format(end - start))


@app.route('/')
def hello_world():
    return 'Hello World!'


@app.route('/predict', methods=['POST'])
def predict():
    start = time.time()
    with open("temp.jpg", 'wb') as image:
        image.write(request.stream.read())
    end = time.time()
Example #4
0
from darknet.python.darknet import detect


if __name__ == '__main__':
	
	input_dir  = sys.argv[1]
	output_dir = input_dir

	ocr_threshold = .4

	ocr_weights = 'data/ocr/ocr-net.weights'
	ocr_netcfg  = 'data/ocr/ocr-net.cfg'
	ocr_dataset = 'data/ocr/ocr-net.data'

	ocr_net  = dn.load_net(ocr_netcfg.encode("utf-8"), ocr_weights.encode("utf-8"), 0)
	ocr_meta = dn.load_meta(ocr_dataset.encode("utf-8"))

	imgs_paths = glob('%s/*lp.png' % output_dir)

	print ('Performing OCR...')

	for i,img_path in enumerate(imgs_paths):

		print ('\tScanning %s' % img_path)

		bname = basename(splitext(img_path)[0])

		R = detect(ocr_net, ocr_meta, img_path ,thresh=ocr_threshold)

		if len(R):
Example #5
0
if __name__ == '__main__':

    try:

        input_dir = sys.argv[1]
        output_dir = sys.argv[2]

        vehicle_threshold = 0.5

        vehicle_weights = 'data/vehicle-detector/yolo-voc.weights'
        vehicle_netcfg = 'data/vehicle-detector/yolo-voc.cfg'
        vehicle_dataset = 'data/vehicle-detector/voc.data'

        vehicle_net = dn.load_net(vehicle_netcfg.encode('utf-8'),
                                  vehicle_weights.encode('utf-8'), 0)
        vehicle_meta = dn.load_meta(vehicle_dataset.encode('utf-8'))

        imgs_paths = image_files_from_folder(input_dir)
        imgs_paths.sort()

        if not isdir(output_dir):
            makedirs(output_dir)

        print('Searching for vehicles using YOLO...')

        for i, img_path in enumerate(imgs_paths):

            print('\tScanning %s' % img_path)

            bname = basename(splitext(img_path)[0])
import traceback

from src.draw_BB import draw_bb
from WPOD_src.label import Label
from WPOD_src.utils import image_files_from_folder

import darknet.python.darknet as dn

# best:FRNet_YOLOv3_50000.weights
FR_weights = 'data/FRD/FRNet_YOLOv3_50000.weights'
FR_netcfg = 'data/FRD/FRNet_YOLOv3.cfg'
FR_data = 'data/FRD/FRNet_YOLOv3.data'

print 'FRD Net pre-loading...'
FR_net = dn.load_net(FR_netcfg, FR_weights, 0)
FR_meta = dn.load_meta(FR_data)
threshold = 0.5


def fr_detect(img):
    print '\t\t\tdetecting front and rear using FRD..., Model:', FR_netcfg
    results, wh = dn.detect(FR_net, FR_meta, img, threshold)

    # the results will be list according to its probability , high prob -> low prob
    if len(results):
        print '\t\t\tFR detection completed'
        FRs = []
        category = []
        for i, result in enumerate(results):
            WH = np.array(img.shape[1::-1], dtype=float)
            cx, cy, w, h = (np.array(result[2]) / np.concatenate(
Example #7
0
def main():
    cfg = util.parse_cla()  #"./config/config.yaml"

    #Load config
    print("Loading configuration...", end="")
    args = config.get_parser(cfg.config)
    print("Done")

    if args.BOUNDING:

        #Initialise the yolo predictor once to hold model for all predictions
        print("Loading yolo model...", end="")
        net = dn.load_net(bytes(args.YOLO_CFG, 'utf-8'),
                          bytes(args.YOLO_WEIGHTS, 'utf-8'), 0)
        meta = dn.load_meta(bytes(args.YOLO_DATA, 'utf-8'))
        print("Done")

    else:

        #Initialise the segmentation predictor once to hold model for all predictions
        print("Loading segmentation model...", end="")
        predictor = segmentation.SegmentationPredictor(args)
        print("Done")

    #Initialise the pose estimator
    print("Loading pose model...", end="")
    ap = AlphaPose(Args(cfg.config, args.POSE_MODEL))
    print("Done")

    #Initialise classification model
    print("Loading classification model...", end="")
    nn_classifier = pickle.load(open(args.CLASSIFICATION_MODEL, 'rb'))
    print("Done")

    #Load in all the images from the in_dir folder
    if (args.IMAGE_INPUT) & (not args.VIDEO_INPUT):

        images = util.get_images(args.IN_DIR)

    elif (args.VIDEO_INPUT) & (not args.IMAGE_INPUT):

        #If working on a video convert the input video into a series of images
        #Skipping certain frames in order to make it the correct framerate
        #Given the option to reduce framerate as can be slow to process many frames
        print("Converting video to input frames...", end="")
        images = util.video_to_frames(args.IN_DIR, args.VIDEO_FILE,
                                      args.FRAMERATE)
        print("Done")

    else:
        raise ValueError(
            "VIDEO input and IMAGE input can't happen simultaneously")

    #Tags object to store data calculated
    tags = {}

    #Initialise data
    i = 0
    ruck = scrum = maul = lineout = []

    images = list(images)
    len_images = len(images)
    start = datetime.datetime.now()

    #For each image
    for image, image_path in images:

        #Debugging
        util.print_progress_bar(i,
                                len_images,
                                suffix="{}/{}".format(i, len_images))

        #Setup paths
        inpath = os.path.join(args.IN_DIR, image_path)
        outpath = os.path.join(args.OUT_DIR, image_path)

        #Load Image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        #If using the bounding box YOLO
        if args.BOUNDING:

            #Convert to correct formay
            im = dn.array_to_image(image)
            dn.rgbgr_image(im)

            #Get clusters
            dimentions = dn.detect2(net,
                                    meta,
                                    im,
                                    thresh=.5,
                                    hier_thresh=.5,
                                    nms=.45)

            #If clusters detected crop image to cluster parts
            if len(dimentions) > 0:
                dimentions = np.array(dimentions)[:, 2]

                image_clusters = clusters.create_image_clusters(
                    image, dimentions)

            else:
                image_clusters = []

        #If using segmentation mask PSPN
        else:
            mask = predictor.predict(image)

            #Convert mask from PIL format to numpy/opencv
            mask = np.array(mask) * 255
            mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
            mask = cv2.bitwise_not(mask)

            #Get clusters
            out = clusters.makemask(image, mask)

            image_clusters, dimentions = clusters.extractclusters(out, image)
            dimentions = [dimentions]

        if len(image_clusters) > 0:
            #Just work with single biggest cluster

            if 0 in image_clusters[0].shape:
                continue

            #Run through alphapose to get json of poses
            json_data = ap.predict(image_clusters[0], image_path)
            json_data = json.loads(json_data)

            #Classify
            try:
                cluster = nn_classifier.predict(json_data)
                max_index = np.argmax(cluster[0])
            except:
                continue

            #Convert back to textual format
            tag = nn_classifier.le.inverse_transform([max_index])

            #Unpack dimentions of cluster
            x, y, w, h = dimentions[0]
            x = int(x)
            y = int(y)
            w = int(w)
            h = int(h)

            #Draw bounding box and add tag annotation to original image
            cv2.rectangle(image, (x, y), (x + w, y + h), args.FONT_COLOR,
                          args.FONT_THICKNESS)
            cv2.putText(image, "{0}: {1:.3f}".format(tag[0],
                                                     cluster[0][max_index]),
                        ((x + w + 10), (y - 10)), args.FONT, args.FONT_SCALE,
                        args.FONT_COLOR, args.FONT_THICKNESS)

            #Draw predictions graph
            if args.PREDITION_GRAPH:

                #Create random sample data for now
                ruck = util.add_to_const_arr(ruck, cluster[0][0],
                                             args.FRAME_HISTORY)
                maul = util.add_to_const_arr(maul, cluster[0][1],
                                             args.FRAME_HISTORY)
                scrum = util.add_to_const_arr(scrum, cluster[0][2],
                                              args.FRAME_HISTORY)
                lineout = util.add_to_const_arr(lineout, cluster[0][3],
                                                args.FRAME_HISTORY)

                #Calculate overlay size based on UI_SCALE parameter
                UI_SCALE = 1 / args.UI_SCALE
                overlay_dim = (int(image.shape[1] / (UI_SCALE * 10)),
                               int(image.shape[0] / (UI_SCALE * 7)))

                #Create graph
                fig = ui.plot_preditions_graph(ruck, maul, scrum, lineout)

                #Convert to numpy array (cv2 format)
                overlay = ui.get_img_from_fig(fig)

                #Resize to correct dimentions
                overlay = cv2.resize(overlay, overlay_dim)

                #Overlay images
                image = overlay_images(overlay, image)

            #Update the ouput object with image tag
            tags[image_path] = {
                "tag": tag[0],
                "prob": cluster[0][max_index],
                "bbox": {
                    "x": x,
                    "y": y,
                    "w": w,
                    "h": h
                }
            }

        #Save original image with bounding box and associated tag
        plt.imsave(outpath, image)

        i += 1

    end = datetime.datetime.now()
    print("Runtime:", end - start)

    #Print once again to show 100%
    util.print_progress_bar(len_images, len_images)

    #Output the results object
    with open(os.path.join(args.OUT_DIR, "results.json"), "w") as file:
        json.dump(tags, file)

    #If evaluating on the test dataset
    if args.EVALUATE:
        acc = 0
        detections = 0

        #Get the test dataset as json
        data = preprocessor.import_json(args.TEST_DATASET)

        #For each image compare tag to predicted tag
        for image, image_path in images:

            #If both exist, otherwise add 0 as no cluster/poses of note were found on that image
            try:
                if image_path in tags.keys():
                    detections += 1
                if data[image_path]['tag'] == tags[image_path]['tag']:
                    acc += 1
            except:
                continue

        acc = acc / len_images

        print("Detections in test dataset:", detections)
        print("Acuracy on test dataset:", acc)

    #If video input delete frames created and make new video from outputed frames
    if args.VIDEO_INPUT:
        print("Processing output video...", end="")

        #Collate filenames to be made into video
        img_file_arr = glob.glob('output/*.png')
        img_file_arr.sort(key=util.sort_filenames)

        #Open files into array
        img_array = []
        for filename in img_file_arr:
            img = cv2.imread(filename)
            height, width, layers = img.shape
            size = (width, height)
            img_array.append(img)

        #Create video output file with parameters
        out = cv2.VideoWriter(os.path.join("output", 'output.mp4'),
                              cv2.VideoWriter_fourcc(*'mp4v'), args.FRAMERATE,
                              size)

        #Write frame array to the output video file
        for i in range(len(img_array)):
            out.write(img_array[i])
        out.release()

        print("Done")
from darknet.python.darknet import detect

if __name__ == '__main__':

    input_dir = sys.argv[1]
    output_dir = sys.argv[2]

    vehicle_threshold = .5

    vehicle_weights = 'data/vehicle-detector/yolo-voc.weights'
    vehicle_netcfg = 'data/vehicle-detector/yolo-voc.cfg'
    vehicle_dataset = 'data/vehicle-detector/voc.data'

    vehicle_net = dn.load_net(vehicle_netcfg.encode("utf-8"),
                              vehicle_weights.encode("utf-8"), 0)
    vehicle_meta = dn.load_meta(vehicle_dataset.encode("utf-8"))

    imgs_paths = image_files_from_folder(input_dir)
    imgs_paths.sort()

    if not isdir(output_dir):
        makedirs(output_dir)

    print('Searching for vehicles using YOLO...')

    for i, img_path in enumerate(imgs_paths):

        print('\tScanning %s' % img_path)

        bname = basename(splitext(img_path)[0])
Example #9
0
context = zmq.Context()
zmq_socket = context.socket(zmq.PUSH)
zmq_socket.connect("tcp://zmq:5559")

sio = socketio.Client()
sio.connect('http://server:5000')

thresh = 0.4

weights = 'darknet/models/bins/backup/yolo-v4-obj-train_last.weights'
netcfg = 'darknet/models/bins/yolo/yolo-v4-obj-test.cfg'
data = 'darknet/models/bins/yolo/annotate/obj.data'

net = dn.load_net(netcfg.encode('utf-8'), weights.encode('utf-8'), 0)
meta = dn.load_meta(data.encode('utf-8'))
camera = 0  # RTSP IP For ip-cam OR 0 For Default video-cam
accept_cls = ['glass', 'can', 'plastic']
pgm = {'glass': 0, 'can': 1, 'plastic': 2}
video_camera = None


def video_stream():
    global video_camera, net, meta, data
    count = 0
    alert_classes = []  # target classes
    alert_classes += accept_cls
    current = datetime.now()
    if video_camera == None:
        video_camera = VideoCamera(camera=camera, alert_classes=alert_classes)
    while True:
    try:

        args = parse_args()

        input_dir = args.input_dir
        output_dir = input_dir

        lp_threshold = args.lp_threshold

        lp_weights = b'data/simple-lp-detector/lapi.weights'
        lp_netcfg = b'data/simple-lp-detector/yolov3-lp.cfg'
        lp_dataset = b'data/simple-lp-detector/yolov3-lp.data'

        lp_net = dn.load_net(lp_netcfg, lp_weights, 0)
        lp_meta = dn.load_meta(lp_dataset)

        imgs_paths = image_files_from_folder(input_dir)
        # Filter only cropped car images
        imgs_paths.sort()

        if not isdir(output_dir):
            makedirs(output_dir)

        print('Searching for license plates in cropped cars using YOLO...')

        for i, img_path in enumerate(imgs_paths):

            print('\tScanning %s' % img_path)

            bname = basename(splitext(img_path)[0])
from darknet.python.darknet import detect

if __name__ == '__main__':

    input_dir = sys.argv[1]
    output_dir = input_dir

    ocr_threshold = .4

    ocr_weights = 'data/ocr/ocr-net.weights'
    ocr_netcfg = 'data/ocr/ocr-net.cfg'
    ocr_dataset = 'data/ocr/ocr-net.data'

    ocr_net = dn.load_net(ocr_netcfg.encode('utf-8'),
                          ocr_weights.encode('utf-8'), 0)
    ocr_meta = dn.load_meta(ocr_dataset.encode('utf-8'))

    imgs_paths = glob('%s/*lp.png' % output_dir)

    print('Performing OCR...')

    for i, img_path in enumerate(imgs_paths):

        print('\tScanning %s' % img_path)

        bname = basename(splitext(img_path)[0])

        R = detect(ocr_net,
                   ocr_meta,
                   img_path.encode('utf-8'),
                   thresh=ocr_threshold)
Example #12
0
def load_yolo(dn_net, net_weights, net_meta):
    vehicle_net = dn.load_net(dn_net, net_weights, 0)
    vehicle_meta = dn.load_meta(net_meta)
    return vehicle_net, vehicle_meta
Example #13
0
cwd = os.getcwd()
try:
    os.chdir("./darknet")
    os.system("wget 'https://pjreddie.com/media/files/yolov3.weights'")
    os.system("make")
finally:
    os.chdir("..")
# process2 = subprocess.Popen(["wget", "https://pjreddie.com/media/files/yolov3.weights"], cwd = cwd + '/darknet')
# process2.wait()
# process = subprocess.Popen(["make"], cwd = cwd + '/darknet')
# process.wait()

from darknet.python import darknet as dn
import pdb
net = dn.load_net("darknet/cfg/yolov3.cfg".encode("utf-8"), "darknet/yolov3.weights".encode("utf-8"), 0)
meta = dn.load_meta("darknet/cfg/coco.data".encode("utf-8"))

image = cv2.imread("darknet/data/dog.jpg")
result = dn.detect(net, meta, image)
print(result)

app = Flask(__name__)
net = 0
meta = 0

def load_model():
    pass
    """Load and return the model"""
    # TODO: INSERT CODE
    # return model
# The request method is POST (this method enables your to send arbitrary data to the endpoint in the request body, including images, JSON, encoded-data, etc.)
Example #14
0
if __name__ == '__main__':

    try:

        input_dir = sys.argv[1]
        output_dir = input_dir

        ocr_threshold = .4

        ocr_weights = 'data/ocr/ocr-net.weights'
        ocr_netcfg = 'data/ocr/ocr-net.cfg'
        ocr_dataset = 'data/ocr/ocr-net.data'

        ocr_net = dn.load_net(ocr_netcfg, ocr_weights, 0)
        ocr_meta = dn.load_meta(ocr_dataset)

        imgs_paths = sorted(glob('%s/*lp.png' % output_dir))

        print 'Performing OCR...'

        # --store the image name temporarily---
        last_handle = ''
        # -------------------------------------

        for i, img_path in enumerate(imgs_paths):
            print '\tScanning %s' % img_path

            bname = basename(splitext(img_path)[0])

            R, (width, height) = detect(ocr_net,
Example #15
0
        output_dir = '/home/yaokun/tmp'  #sys.argv[2]

        bname = basename(splitext(input_video_path)[0])

        # vehicle detect
        vehicle_threshold = .5

        vehicle_weights = 'data/vehicle-detector/yolo-voc.weights'  #.encode('ascii')
        vehicle_netcfg = 'data/vehicle-detector/yolo-voc.cfg'  #.encode('ascii').encode('ascii')
        vehicle_dataset = 'data/vehicle-detector/voc.data'  #.encode('ascii')
        vehicle_weights = vehicle_weights.encode('ascii')
        vehicle_netcfg = vehicle_netcfg.encode('ascii')
        vehicle_dataset = vehicle_dataset.encode('ascii')

        vehicle_net = dn.load_net(vehicle_netcfg, vehicle_weights, 0)
        vehicle_meta = dn.load_meta(vehicle_dataset)

        # lp detect
        lp_threshold = .5

        wpod_net_path = 'data/lp-detector/bak/wpod-net_update1.h5'
        print(wpod_net_path)
        #wpod_net_path = wpod_net_path.encode('ascii')
        wpod_net = load_model(wpod_net_path)

        # ocr
        ocr_threshold = .4

        ocr_weights = 'data/ocr/ocr-net.weights'
        ocr_netcfg = 'data/ocr/ocr-net.cfg'
        ocr_dataset = 'data/ocr/ocr-net.data'
    try:

        args = parse_args()

        input_dir = args.input_dir
        output_dir = args.output_dir

        vehicle_threshold = args.vehicle_threshold

        vehicle_weights = b'data/vehicle-detector/yolov3.weights'
        vehicle_netcfg = b'data/vehicle-detector/yolov3.cfg'
        # vehicle_netcfg = b'darknet/cfg/yolov3.cfg'
        vehicle_dataset = b'data/vehicle-detector/coco.data'

        vehicle_net = dn.load_net(vehicle_netcfg, vehicle_weights, 0)
        vehicle_meta = dn.load_meta(vehicle_dataset)

        imgs_paths = image_files_from_folder(input_dir)
        imgs_paths.sort()

        if not isdir(output_dir):
            makedirs(output_dir)

        print('Searching for vehicles using YOLO...')

        for i, img_path in enumerate(imgs_paths):

            print('\tScanning %s' % img_path)

            bname = basename(splitext(img_path)[0])
def objectDetection(root, in_file, output_dir, classes):
    try:
        out_file = in_file.split('/')[-1].split('.')[0] + '_out'

        vehicle_threshold = .5

        vehicle_weights = 'object_detector/data/vehicle-detector/yolov2.weights'
        vehicle_netcfg = 'object_detector/data/vehicle-detector/yolov2.cfg'
        vehicle_dataset = 'object_detector/data/vehicle-detector/coco.data'

        file_ptr = open(output_dir + '/' + out_file + '_obj.txt', 'w+')

        vehicle_net = dn.load_net(vehicle_netcfg, vehicle_weights, 0)
        vehicle_meta = dn.load_meta(vehicle_dataset)

        if 'person' in classes:
            gen_model = import_gender_model('./gender_model.h5')

        writer = None
        cap = cv2.VideoCapture(in_file)
        cnt = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                root.statusStrVar.set(
                    'Done...Video saved at {}'.format(output_dir + '/' +
                                                      out_file + '.mp4'))
                break

            print 'Processing frame {}..'.format(cnt)
            root.statusStrVar.set('Processing frame {}..'.format(cnt))

            img = nparray_to_image(frame)
            R, _ = detect(vehicle_net, vehicle_meta, img, vehicle_threshold)
            R = [r for r in R if r[0] in classes]
            print '\t%d objects found' % len(R)

            txt = 'Frame No : {}\n'.format(cnt)

            if len(R):
                WH = np.array(frame.shape[1::-1], dtype=float)

                for i, r in enumerate(R):
                    name = r[0]
                    confidence = r[1] * 100
                    cx, cy, w, h = (np.array(r[2]) / np.concatenate(
                        (WH, WH))).tolist()
                    tl = (int((cx - w / 2.) * WH[0]), int(
                        (cy - h / 2.) * WH[1]))
                    br = (int((cx + w / 2.) * WH[0]), int(
                        (cy + h / 2.) * WH[1]))

                    object_img = frame[tl[1]:tl[1] + int(w * WH[1]),
                                       tl[0]:tl[0] + int(h * WH[0])]

                    if name == 'car':
                        color = ''
                        if object_img.shape[0] > 0 and object_img.shape[1] > 0:
                            color = process_image(Image.fromarray(object_img))
                            # color = get_color(object_img)

                        txt += '\t Object Name, Coords : {}, ({}, {})\n'.format(
                            name, tl, br)
                        txt += '\t Object confidence : ' + '{:.2f}'.format(
                            confidence) + '\n'
                        txt += '\t Vehicle Color : {}\n\n'.format(color)

                        cv2.putText(
                            frame,
                            '{} ('.format(name) + '{:.2f}'.format(confidence) +
                            ', {})'.format(color), tl,
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2,
                            cv2.LINE_AA)
                    if name == 'person':
                        gender = ''
                        if object_img.shape[0] > 0 and object_img.shape[1] > 0:
                            gender = gender_detect(object_img, gen_model)

                        txt += '\t Object Name, Coords : {}, ({}, {})\n'.format(
                            name, tl, br)
                        txt += '\t Object confidence : ' + '{:.2f}'.format(
                            confidence) + '\n'
                        txt += '\t Person gender : {}\n\n'.format(gender)

                        cv2.putText(
                            frame,
                            '{} ('.format(name) + '{:.2f}'.format(confidence) +
                            ', {})'.format(gender), tl,
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1,
                            cv2.LINE_AA)
                    if name != 'car' and name != 'person':
                        txt += '\t Object Name, Coords : {}, ({}, {})\n'.format(
                            name, tl, br)
                        txt += '\t Object confidence : ' + '{:.2f}'.format(
                            confidence) + '\n\n'

                        cv2.putText(
                            frame, '{} ('.format(name) +
                            '{:.2f}'.format(confidence) + ')', tl,
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2,
                            cv2.LINE_AA)

                    print '\t\t{}th Coodrs : ({}, {})'.format(i, tl, br)
                    cv2.rectangle(frame, tl, br, (255, 0, 0),
                                  2)  #crop_region(Iorig,label)

                file_ptr.write(txt)
                print '\n'

            if writer is None:
                fourcc = cv2.VideoWriter_fourcc(*'DIVX')
                writer = cv2.VideoWriter(output_dir + '/' + out_file + '.mp4',
                                         fourcc, 30,
                                         (frame.shape[1], frame.shape[0]),
                                         True)

            writer.write(frame)
            del frame
            cnt += 1

    except:
        traceback.print_exc()
        sys.exit(1)

    file_ptr.close()
    writer.release()
    cap.release()
    sys.exit(0)