def main():
    #----------------initialization----------------
    t0 = time.time()
    plt.ion()
    index = 0
    print("Loaded graph in {:.2f}s".format(time.time() - t0))
    capture = cv2.VideoCapture(input_video)

    # loading pre-trained model and config file
    net = cv2.dnn.readNet(wieght_arg, config_arg)
    # start to process
    ret, _ = capture.read()

    # Is there any frame to read?
    while ret:
        index += 1
        ret, frame = capture.read()
        img = copy.deepcopy(frame)
        # applying transformation and apropriate changes to frame to feed the loaded model
        #---- this section is empty in OpenCV ----

        t0 = time.time()
        # feeding tensor to loaded model and obtaining the bounding boxes of detected objects
        roi_boxes, roi_confidences, roi_class, roi_indices = detection(
            frame, net)
        # drawing the boxes around the detected objects and saving the objects simultaneously
        for i in roi_indices:
            i = i[0]
            for j, v in enumerate(roi_boxes[i]):
                if v < 0:
                    roi_boxes[i][j] = 0

            box = roi_boxes[i]
            x = round(box[0])
            y = round(box[1])
            w = round(box[2])
            h = round(box[3])

            # #croping and extracting bounding boxes of detected objects in frames, then save them in './extracted_regions/' Directory
            cv2.imwrite(
                './extracted_objects/frame_' + str(index) + '_obj_' + str(i) +
                '.jpg', img[y:y + h, x:x + w])

            draw_bounding_box(frame, roi_class[i], roi_confidences[i], x, y,
                              x + w, y + h)

        print("Predictions found in {:.2f}s".format(time.time() - t0))

        # show results of processing each frame to user
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.xticks([]), plt.yticks([])
        plt.pause(0.02)
        plt.show()
Exemple #2
0
def annotate_image(image, idxs, boxes, COLORS, LABELS, confidences, classIDs):
    bounding_boxes = []
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            bounding_boxes.append(boxes[i])

            color = [int(c) for c in COLORS[classIDs[i]]]
            label = LABELS[classIDs[i]]

            draw_bounding_box(image, color, label, confidences[i], x, y, w, h)
Exemple #3
0
def main():
	#开始测试
	for file in dirs:
		image_path = os.path.join(text_floder, os.path.basename(file))
		#print(os.path.basename(file))
		#print(image_path)
		# 加载灰度图像和灰度图片
		rgb_image = load_image(image_path, grayscale=False, color_mode = "rgb")
		gray_image = load_image(image_path, grayscale=True,color_mode = "grayscale")
		# 去掉维度为1的维度(只留下宽高,去掉灰度维度)
		gray_image = np.squeeze(gray_image)
		gray_image = gray_image.astype("uint8")
		faces = detect_faces(face_detection, gray_image)
		#print("-----")
		#print(len(faces))
		for face_coordinates in faces:
			#获取人脸在图像中的矩形坐标的对角两点
			x1, x2, y1, y2 = get_coordinates(face_coordinates)
			#print(x1, x2, y1, y2 )
			# 截取人脸图像像素数组
			gray_face = gray_image[y1:y2, x1:x2]

			try:
				# 将人脸reshape模型需要的尺寸
				gray_face = cv2.resize(gray_face,(emotion_target_size))
			except:
				continue

			gray_face = preprocessing_input(gray_face)
			gray_face = np.expand_dims(gray_face, 0)
			gray_face = np.expand_dims(gray_face, -1)
			# print(gray_face.shape)

			# 预测
			emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
			emotion_text = emotion_labels[emotion_label_arg]

			color = (255,0,0)
			# 画边框
			draw_bounding_box(face_coordinates, rgb_image, color)
			# 画表情说明
			draw_text(face_coordinates, rgb_image, emotion_text, color, 0, face_coordinates[3]+30, 1, 2)

			# 将图像转换为BGR模式显示
			bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
			cv2.imwrite("./pic_test/"+"predict"+os.path.basename(file), bgr_image)

			cv2.waitKey(1)
			cv2.destroyAllWindows()

	print("已识别%d张图片" % int(len(dirs)))
Exemple #4
0
        im_dim_batches = [
            torch.cat((im_dim_list[i * args.batch_size:min(
                (i + 1) * args.batch_size, len(im_batches))]))
            for i in range(num_batches)
        ]

    output = []
    for i, batch in enumerate(im_batches):
        start = time.time()
        with torch.no_grad():
            prediction, _ = model(batch)

        prediction = utils.non_max_suppression(prediction, args.conf_thresh,
                                               args.nms_thresh)
        end = time.time()
        print("The inference time of batch %d is %.3f" % (i, end - start))
        output.extend(prediction)

    colors = utils.get_cmap()

    for i in range(len(output)):
        if output[i] is not None:
            res = utils.recover_img_size(output[i], im_dim_list[i],
                                         args.img_size)
            list(
                map(
                    lambda x: utils.draw_bounding_box(x, loaded_ims[i], colors,
                                                      classes), res))
            name = os.path.join(args.output_path,
                                'det_' + os.path.basename(imlist[i]))
            cv2.imwrite(name, loaded_ims[i])
# initialize the camera and grab a reference to the raw camera capture
cap = cv.VideoCapture('udpsrc port=5000 ! application/x-rtp, payload=96 ! rtpjitterbuffer ! rtph264depay ! avdec_h264  ! videoconvert  ! queue ! appsink sync=false ', cv.CAP_GSTREAMER)
detector = dlib.get_frontal_face_detector()

start = time.time()
frame_id = 0
# capture frames from the camera
while True:
    ret, frame = cap.read()
    faces = detector(frame, 1)

    for rect in faces:
        (x, y, w, h) = rect_to_bb(rect)
        x1, x2, y1, y2 = apply_offsets((x, y, w, h), (20, 40))
        color = (0, 255, 0)
        draw_bounding_box(image=frame, coordinates=(x1, y1, x2 - x1, y2 - y1), color=color)

    num_faces = len(faces)
    end = time.time()
    seconds = end - start
    fps = 1.0 / seconds
    draw_str(frame, (20, 20), 'fps: %d' % (fps))

    # show the frame
    cv.imshow("Frame", frame)
    key = cv.waitKey(1) & 0xFF

    start = time.time()
    frame_id += 1

    # if the `q` key was pressed, break from the loop
def index():

    print('Request-form', list(request.form.keys()), file=sys.stderr)
    print('Request-form-name', request.form['name'], file=sys.stderr)
    # print('Request-form-image',request.form['image'],file=sys.stderr)

    image_name = request.form['name']
    image_string = request.form['image']
    #image_bytes = bytes(image_string,'utf-8')
    #image_decoded = base64.decodestring(image_string)

    image = Image.open(BytesIO(base64.b64decode(image_string)))

    # rotated_image = image.rotate(270,expand=True)

    # input_array = np.array(rotated_image)

    input_array = np.array(image)

    input_array = np.expand_dims(input_array, axis=0)

    #result_array = detect.run(input_array)

    (_boxes, _scores, _classes,
     _masks) = sess.run([boxes, scores, classes, masks],
                        feed_dict={input_: input_array})

    _boxes = np.squeeze(_boxes, axis=0)
    _scores = np.squeeze(_scores, axis=0)
    _classes = np.squeeze(_classes, axis=0)
    _masks = np.squeeze(_masks, axis=0)
    input_array = np.squeeze(input_array, axis=0)

    detections = utils.get_detections(_scores, config.threshold_score)

    utils.draw_bounding_box(input_array, detections, _boxes, _classes,
                            class_map, _masks)

    result_image = Image.fromarray(input_array)

    #print('rotated_image.shape = ',input_array.shape)

    result_image.save('output.jpg', format='JPEG')

    #convert image back to string..
    buffered = BytesIO()
    result_image.save(buffered, format="JPEG")
    final_img_str = base64.b64encode(buffered.getvalue())

    #     print('Request-files:',request.files,file=sys.stderr)
    #     print('Requestfiletype:',type(request.files),file=sys.stderr)

    #     data = request.files.to_dict()

    #     print('data',data,file=sys.stderr)

    #     #to-do Input file validation... (ensure input file is valid jpg or png)
    #     file = data['upload']

    #     print('File name:',file.filename,file=sys.stderr)

    #     file_path = os.path.join("Images",file.filename)

    #     file.save(file_path)

    #     print('File saved with name:',file.filename,file=sys.stderr)

    #Deserialize the image..
    #     with open(image_name,'wb') as image_file:
    #         image_file.write(image)

    response = final_img_str

    # print("Returning Image Response...",file=sys.stderr)

    return response
Exemple #7
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    if 'faster' in args.model:
        faster_rnn = True
    else:
        faster_rnn = False

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model,
                             device=args.device,
                             cpu_extension=args.cpu_extension)

    # We need model required input dimensions:
    required_input_shape = infer_network.get_input_shape(faster_rnn=faster_rnn)
    required_input_width = required_input_shape[2]
    required_input_height = required_input_shape[3]

    ### TODO: Handle the input stream ###
    if args.input != 'CAM':
        try:
            # It seems that OpenCV can use VideoCapture to treat videos and images:
            input_stream = cv2.VideoCapture(args.input)
            length = int(input_stream.get(cv2.CAP_PROP_FRAME_COUNT))
            webcamera = False

            # Check if input is an image or video file:
            if length > 1:
                single_image_mode = False
            else:
                single_image_mode = True

        except:
            print(
                'Not supported image or video file format. Please pass a supported one.'
            )
            exit()

    else:
        input_stream = cv2.VideoCapture(0)
        single_image_mode = False
        webcamera = True

    # We need fps for time related calculations:
    fps = input_stream.get(cv2.CAP_PROP_FPS)

    # We also need input stream width and height:
    stream_width = int(input_stream.get(3))
    stream_height = int(input_stream.get(4))

    not_in_frame = 0  # Counter for Faster RNN:
    frames_for_quit = 10  # Number of consecutive frames we wait until we consider a person is completly out of frame.

    if not single_image_mode:
        ### TODO: Loop until stream is over ###
        # These are tuning values and others required for the counter logic:

        ## Tuning, could be asked as possible arguments:
        LOWER_HALF = 0.7  # Fraction of total height a centroid is considered to be in the "lower half"
        RIGHT_HALF = 0.8  # Fraction of total width a centroid is considered to be in the "right half". With 0.87 works but it is too extreme.
        DETECTION_FRAMES = 1  # If current count_frame is divisible by this number, detection model is run.

        count_frame = 0  # Frame counter.
        status_lower_half = False  # Status of the lower half.
        status_upper_half = False  # Status of the upper half.
        id = 0  # Identifier for people.
        current_person = []  # For storing current person in frame.
        current_time = [0]  # For storing last recorded time.

        # Params to send to MQTT Server:
        total_counted = 0  # People counter.
        people_in_frame = 0  # People in frame status.

        while (input_stream.isOpened()):

            ### TODO: Read from the video capture ###
            # Read the next frame:
            flag, frame = input_stream.read()

            # Quit if there is no more stream:
            if not flag:
                break

            # Quit if 'q' is pressed:
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            # Execute detection model if required in this frame:
            if count_frame % DETECTION_FRAMES == 0:

                ### TODO: Pre-process the image as needed ###
                preprocessed_frame = utils.handle_image(
                    frame,
                    width=required_input_width,
                    height=required_input_height)

                ### TODO: Start asynchronous inference for specified request ###
                infer_network.exec_net(preprocessed_frame,
                                       faster_rnn=faster_rnn)

                ### TODO: Wait for the result ###
                status = infer_network.wait()
                if status == 0:  # Wait until we have results.
                    prev_results = infer_network.get_output()  # Get outputs.

                    ### TODO: Get the results of the inference request ###
                    results_bb = []
                    for p_r in prev_results[0, 0]:  # Iterate over outputs.
                        if p_r[2] >= args.prob_threshold and p_r[
                                1] == 1.0:  # Filter relevant outputs. p_r[1]==1: check only for people.
                            results_bb.append(
                                p_r[3:])  # Save those relevant results.

                    ### TODO: Extract any desired stats from the results ###
                    if not faster_rnn:  # Faster RNN has better detection capabilities, not necessary to porcess the same way.
                        if len(results_bb) > 0:
                            for detection in results_bb:  # Iterate through each detection:
                                centroid = utils.calculate_centroid(detection)
                                frame = utils.draw_bounding_box(
                                    frame, detection)

                                if centroid[
                                        1] > LOWER_HALF and status_lower_half == False and status_upper_half == False:  # Meaning there is a new detection in the lower border.
                                    status_lower_half = True
                                    person = utils.Person(
                                        id=id, frame_init=count_frame)
                                    current_person.append(person)
                                    total_counted = total_counted + 1
                                    id = id + 1
                                elif status_lower_half:
                                    status_lower_half = False
                                    status_upper_half = True

                                # To check that there is a detection in one of the halves:
                                people_in_frame = status_upper_half + status_lower_half

                                if centroid[
                                        0] > RIGHT_HALF and status_upper_half == True:
                                    status_lower_half = False
                                    status_upper_half = False
                                    people_in_frame = 0
                                    current_time[0] = (
                                        count_frame -
                                        current_person[0].frame_init) / fps

                                    current_person = []
                                    client.publish(
                                        "person/duration",
                                        json.dumps(
                                            {"duration": current_time[0]}))
                    else:  # Using Faster RNN Model:
                        if len(results_bb) == 0:
                            not_in_frame = not_in_frame + 1
                            if not_in_frame >= frames_for_quit and current_person:
                                not_in_frame = 0
                                people_in_frame = 0
                                if current_person:
                                    # Substracting 'frames_for_quit' because we stopped detecting this person those "frames ago"
                                    current_time[0] = (
                                        count_frame -
                                        current_person[0].frame_init -
                                        frames_for_quit) / fps
                                current_person = []
                                client.publish(
                                    "person/duration",
                                    json.dumps({"duration": current_time[0]}))
                        else:
                            people_in_frame = 1
                            not_in_frame = 0
                            for detection in results_bb:  # Iterate through each detection:
                                frame = utils.draw_bounding_box(
                                    frame, detection)

                            if not current_person:  # Meaning that there is no recorded person.
                                person = utils.Person(id=id,
                                                      frame_init=count_frame)
                                current_person.append(person)
                                total_counted = total_counted + 1
                                id = id + 1

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            client.publish(
                "person",
                json.dumps({
                    "count": people_in_frame,
                    "total": total_counted
                }))

            # Additional feature: Change timer color when a person is more than 15 secs on screen.
            if people_in_frame:
                current_time[0] = (count_frame -
                                   current_person[0].frame_init) / fps
                if current_time[0] > 15:
                    font_color = (0, 0, 255)
                else:
                    font_color = (0, 0, 0)
                frame = utils.draw_text(frame,
                                        "Current person: " +
                                        str(current_time[0]) + " secs",
                                        font_color=font_color)
            else:
                if current_time[0] > 15:
                    font_color = (0, 0, 255)
                else:
                    font_color = (0, 0, 0)
                frame = utils.draw_text(frame,
                                        "Last person: " +
                                        str(current_time[0]) + " secs",
                                        font_color=font_color)

            ### TODO: Send the frame to the FFMPEG server ###

            if not webcamera:
                sys.stdout.buffer.write(frame)
            else:
                cv2.imshow('Resultado', frame)

            count_frame = count_frame + 1

        # Release resources:
        input_stream.release()
    else:
        flag, frame = input_stream.read()
        preprocessed_frame = utils.handle_image(frame,
                                                width=required_input_width,
                                                height=required_input_height)

        infer_network.exec_net(preprocessed_frame)

        status = infer_network.wait()
        if status == 0:  # Wait until we have results.
            prev_results = infer_network.get_output()  # Get outputs.

            results_bb = []
            for p_r in prev_results[0, 0]:  # Iterate over outputs.
                if p_r[2] >= args.prob_threshold and p_r[
                        1] == 1.0:  # Filter relevant outputs. p_r[1]==1: check only for people.
                    results_bb.append(p_r[3:])  # Save those relevant results.

            if len(results_bb) > 0:
                for detection in results_bb:  # Iterate through each detection:
                    frame = utils.draw_bounding_box(frame, detection)

            frame = utils.draw_text(frame,
                                    "People in Frame: " + str(len(results_bb)),
                                    coordinates=(0.05, 0.05))
            cv2.imwrite('result_single_image.png', frame)

    cv2.destroyAllWindows()

    # Disconnect from MQTT:
    client.disconnect()
Exemple #8
0
def index():

    print('Request-form', list(request.form.keys()), file=sys.stderr)
    #print('Request-form-name',request.form['name'],file=sys.stderr)
    # print('Request-form-image',request.form['image'],file=sys.stderr)

    #image_name = request.form['name']
    image_string = request.form['image']
    #image_bytes = bytes(image_string,'utf-8')
    #image_decoded = base64.decodestring(image_string)

    image = Image.open(BytesIO(base64.b64decode(image_string)))

    # rotated_image = image.rotate(270,expand=True)

    # input_array = np.array(rotated_image)

    input_array = np.array(image)

    input_array = np.expand_dims(input_array, axis=0)

    detection_graph = tf.Graph()

    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(config.mask_model_infer_path,
                            mode='rb') as graph_file:
            serialized_graph = graph_file.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def)

    class_map = utils.get_class_map(config.class_map_file)

    with tf.Session(graph=detection_graph) as sess:
        input_ = sess.graph.get_tensor_by_name("import/image_tensor:0")
        boxes = sess.graph.get_tensor_by_name("import/detection_boxes:0")
        scores = sess.graph.get_tensor_by_name("import/detection_scores:0")
        classes = sess.graph.get_tensor_by_name("import/detection_classes:0")
        masks = sess.graph.get_tensor_by_name("import/detection_masks:0")

        #result_array = detect.run(input_array)

        (_boxes, _scores, _classes,
         _masks) = sess.run([boxes, scores, classes, masks],
                            feed_dict={input_: input_array})

    _boxes = np.squeeze(_boxes, axis=0)
    _scores = np.squeeze(_scores, axis=0)
    _classes = np.squeeze(_classes, axis=0)
    _masks = np.squeeze(_masks, axis=0)
    input_array = np.squeeze(input_array, axis=0)

    detections = utils.get_detections(_scores, config.threshold_score)

    utils.draw_bounding_box(input_array, detections, _boxes, _classes,
                            class_map, _masks)

    result_image = Image.fromarray(input_array)

    #print('rotated_image.shape = ',input_array.shape)

    #result_image.save('output.jpg',format='JPEG')

    #convert image back to string..
    buffered = BytesIO()
    result_image.save(buffered, format="JPEG")
    final_img_str = base64.b64encode(buffered.getvalue())

    #     print('Request-files:',request.files,file=sys.stderr)
    #     print('Requestfiletype:',type(request.files),file=sys.stderr)

    #     data = request.files.to_dict()

    #     print('data',data,file=sys.stderr)

    #     #to-do Input file validation... (ensure input file is valid jpg or png)
    #     file = data['upload']

    #     print('File name:',file.filename,file=sys.stderr)

    #     file_path = os.path.join("Images",file.filename)

    #     file.save(file_path)

    #     print('File saved with name:',file.filename,file=sys.stderr)

    #Deserialize the image..
    #     with open(image_name,'wb') as image_file:
    #         image_file.write(image)

    response = final_img_str

    # print("Returning Image Response...",file=sys.stderr)

    # tf.reset_defualt_graph();

    return response
Exemple #9
0
import streamlit as st
import base64
import json
import requests
import numpy as np

from utils import FILE_TYPES, IP_ADDRESS, draw_bounding_box, bytes_to_PIL_image

st.set_option('deprecation.showfileUploaderEncoding', False)

st.title("Mask RCNN with FastAPI")
file_buffer = st.file_uploader("Please upload an image", type=FILE_TYPES)

if file_buffer is not None:
    img_bytes = file_buffer.read()
    st.image(img_bytes, caption="Test image")

if st.button("Detect Objects"):
    if file_buffer is None:
        st.write("No image uploaded...")
    else:
        img = bytes_to_PIL_image(img_bytes)
        img_bytes = base64.b64encode(img_bytes)
        img_bytes = img_bytes.decode("utf-8")
        payload = json.dumps({"img_bytes": img_bytes})
        res = requests.put(IP_ADDRESS, payload)
        json_object = res.json()
        img = np.asarray(img)
        img = draw_bounding_box(img, json_object["boxes"],
                                json_object["classes"])
        st.image(img, caption="Processed image")
        gray_face = np.expand_dims(gray_face, -1)

        emotion_prediction = emotion_classifier.predict(gray_face)
        # emotion_probability = np.max(emotion_prediction)
        emotion_label_arg = np.argmax(emotion_prediction,axis=1)

        emotion_text = class_names[int(emotion_label_arg)]
        picType,prob = face_model.predict(face_recog)
        if picType != -1:
            name_list = read_name_list('/Users/gaoxingyun/Documents/uw/courses/Sp19/EE576_CV/project/realtime_emotion_recognition/dataset')
            print (name_list[picType],prob)
            face_text = name_list[picType]
        else:
            print (" Don't know this person")
            face_text = 'unknown'

        color = (0,255,0)

        draw_bounding_box(face_coordinates, rgb_image, color)
        draw_text(face_coordinates, rgb_image, emotion_text,
                  color, 0, 45, 1, 1)
        draw_text(face_coordinates, rgb_image, face_text, color, 0, -45, 1, 1)

    bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
    cv2.imshow('window_frame', bgr_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()