def vid2frames(vid, oflow, pred_type, mul_oflow, oflow_pnum):
    from frame import frames_downsample, images_rescale
    from opticalflow import OpticalFlow, frames2flows
    from oflow_multiprocessing import process_oflow

    # Extract frames of a video and then normalize it to fixed-length
    # Then make optical flow and RGB lists

    # Input : video(Stream), RGB(Boolean), oflow(Boolean)

    # Output : RGB-list, oflow-list
    rgbFrames, oflowFrames = [], []

    tuRectangle = (224, 224)
    success, frame = vid.read()
    frame_num = 0

    while success:

        if not success:
            print("[warrning]: some video frames are corrupted.")

        # see if this line effecting the results
        frame = cv2.flip(frame, 1)
        frame = cv2.resize(frame, tuRectangle, interpolation=cv2.INTER_AREA)

        rgbFrames.append(frame)

        success, frame = vid.read()

        frame_num += 1

    if len(rgbFrames) == 0:
        raise ValueError("Could not extract webcam frames successfully.")

    #rgbFrames = image_normalize(np.array(rgbFrames), 40)
    if len(rgbFrames) < 40:
        if oflow:
            if mul_oflow:
                oflowFrames = process_oflow(rgbFrames, oflow_pnum)
            else:
                oflowFrames = frames2flows(rgbFrames)
            oflowFrames = frames_downsample(oflowFrames, 40)
        rgbFrames = frames_downsample(np.array(rgbFrames), 40)
    else:
        if pred_type != "sentence":
            rgbFrames = frames_downsample(np.array(rgbFrames), 40)
        if oflow:
            if mul_oflow:
                oflowFrames = process_oflow(rgbFrames, oflow_pnum)
            else:
                oflowFrames = frames2flows(rgbFrames)

    rgbFrames = images_rescale(rgbFrames)

    return rgbFrames, oflowFrames, frame_num
Example #2
0
def vid2frames(vid):
    oOpticalFlow = OpticalFlow(bThirdChannel=False)
    tuRectangle = (224, 224)
    success, frame = vid.read()
    rgbFrames = []
    oflowFrames = []
    while success:

        if not success:
            print("[warrning]: some video frames are corrupted.")

        frame = cv2.flip(frame, 1)
        frame = cv2.resize(frame, tuRectangle, interpolation=cv2.INTER_AREA)
        rgbFrames.append(frame)

        oflow = oOpticalFlow.next(frame)
        oflowFrames.append(oflow)

        success, frame = vid.read()

    rgbFrames = image_normalize(np.array(rgbFrames), 40)
    oflowFrames = frames_downsample(np.array(oflowFrames), 40)

    #print(rgbFrames.shape)
    #print(oflowFrames.shape)

    return rgbFrames, oflowFrames
def framesDir2flowsDir(sFrameBaseDir:str, sFlowBaseDir:str, nFramesNorm:int = None, sAlgorithm:str = "tvl1-fast"):
    """ Calculate optical flow from frames (extracted from videos) 
    
    Input videoframe structure:
    ... sFrameDir / train / class001 / videoname / frames.jpg

    Output:
    ... sFlowDir / train / class001 / videoname / flow.jpg
    """

    # do not (partially) overwrite existing directory
    #if os.path.exists(sFlowBaseDir): 
    #    warnings.warn("\nOptical flow folder " + sFlowBaseDir + " alredy exists: flow calculation stopped")
    #    return

    # get list of directories with frames: ... / sFrameDir/train/class/videodir/frames.jpg
    sCurrentDir = os.getcwd()
    os.chdir(sFrameBaseDir)
    liVideos = sorted(glob.glob("*/*/*"))
    os.chdir(sCurrentDir)
    print("Found %d directories=videos with frames in %s" % (len(liVideos), sFrameBaseDir))

    # loop over all videos-directories
    nCounter = 0
    for sFrameDir in liVideos:

        # generate target directory
        sFlowDir = sFlowBaseDir + "/" + sFrameDir

        if nFramesNorm != None and os.path.exists(sFlowDir):
            nFlows = len(glob.glob(sFlowDir + "/*.*"))
            if nFlows == nFramesNorm: 
                print("Video %5d: optical flow already extracted to %s" % (nCounter, sFlowDir))
                nCounter += 1
                continue
            else: 
                print("Video %5d: Directory with %d instead of %d flows detected" % (nCounter, nFlows, nFramesNorm))

        # retrieve frame files - in ascending order
        arFrames = files2frames(sFrameBaseDir + "/" + sFrameDir)

        # downsample
        if nFramesNorm != None: 
            arFrames = frames_downsample(arFrames, nFramesNorm)

        # calculate and save optical flow
        print("Video %5d: Calc optical flow with %s from %s frames to %s" % (nCounter, sAlgorithm, str(arFrames.shape), sFlowDir))
        arFlows = frames2flows(arFrames, sAlgorithm = sAlgorithm)
        flows2file(arFlows, sFlowDir)

        nCounter += 1      

    return
Example #4
0
def livedemo():

    # dataset
    diVideoSet = {
        "sName": "chalearn",
        "nClasses": 20,  # number of classes
        "nFramesNorm": 40,  # number of frames per video
        "nMinDim": 240,  # smaller dimension of saved video-frames
        "tuShape": (240, 320),  # height, width
        "nFpsAvg": 10,
        "nFramesAvg": 50,
        "fDurationAvg": 5.0
    }  # seconds

    # files
    sClassFile = "data-set/%s/%03d/class.csv" % (diVideoSet["sName"],
                                                 diVideoSet["nClasses"])
    sVideoDir = "data-set/%s/%03d" % (diVideoSet["sName"],
                                      diVideoSet["nClasses"])

    print("\nStarting gesture recognition live demo ... ")
    print(os.getcwd())
    print(diVideoSet)

    # load label description
    oClasses = VideoClasses(sClassFile)

    sModelFile = "model/20180627-0729-chalearn020-oflow-i3d-entire-best.h5"
    h, w = 224, 224
    keI3D = I3D_load(sModelFile, diVideoSet["nFramesNorm"], (h, w, 2),
                     oClasses.nClasses)

    # open a pointer to the webcam video stream
    oStream = video_start(device=1,
                          tuResolution=(320, 240),
                          nFramePerSecond=diVideoSet["nFpsAvg"])

    #liVideosDebug = glob.glob(sVideoDir + "/train/*/*.*")
    nCount = 0
    sResults = ""
    timer = Timer()

    # loop over action states
    while True:
        # show live video and wait for key stroke
        key = video_show(oStream,
                         "green",
                         "Press <blank> to start",
                         sResults,
                         tuRectangle=(h, w))

        # start!
        if key == ord(' '):
            # countdown n sec
            video_show(oStream,
                       "orange",
                       "Recording starts in ",
                       tuRectangle=(h, w),
                       nCountdown=3)

            # record video for n sec
            fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \
             tuRectangle = (h, w), nTimeDuration = int(diVideoSet["fDurationAvg"]), bOpticalFlow = False)
            print("\nCaptured video: %.1f sec, %s, %.1f fps" % \
             (fElapsed, str(arFrames.shape), len(arFrames)/fElapsed))

            # show orange wait box
            frame_show(oStream,
                       "orange",
                       "Translating sign ...",
                       tuRectangle=(h, w))

            # crop and downsample frames
            arFrames = images_crop(arFrames, h, w)
            arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"])

            # Translate frames to flows - these are already scaled between [-1.0, 1.0]
            print("Calculate optical flow on %d frames ..." % len(arFrames))
            timer.start()
            arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True)
            print("Optical flow per frame: %.3f" %
                  (timer.stop() / len(arFrames)))

            # predict video from flows
            print("Predict video with %s ..." % (keI3D.name))
            arX = np.expand_dims(arFlows, axis=0)
            arProbas = keI3D.predict(arX, verbose=1)[0]
            nLabel, sLabel, fProba = probability2label(arProbas,
                                                       oClasses,
                                                       nTop=3)

            sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.)
            print(sResults)
            nCount += 1

        # quit
        elif key == ord('q'):
            break

    # do a bit of cleanup
    oStream.release()
    cv2.destroyAllWindows()

    return
Example #5
0
def image_normalize(rgbFrames, nFrames):
    rgbFrames = frames_downsample(rgbFrames, 40)
    rgbFrames = images_rescale(rgbFrames)
    return rgbFrames
Example #6
0
def sent_preds(rgbs,oflows,frames_count,labels,lstmModel,rgb_model,oflow_model,
    nTop,frames_to_process=30,stride=10,threshold=30):
    pos = 0
    results = []
    #print("rgbs.shape:",rgbs.shape)
    Next = 0
    while rgbs[Next:Next+stride].shape[0] != 0:

        rgbs_p = rgbs[pos * frames_to_process-(pos*stride):(pos+1)*frames_to_process-(pos*stride)]

        #print(f"from {pos*frames_to_process-(pos*stride)} to {(pos+1)*frames_to_process-(pos*stride)}")
        
        oflows_p = oflows[pos*frames_to_process-(pos*stride):(pos+1)*frames_to_process-(pos*stride)]
        #print("rgbs_p.shape:",rgbs_p.shape)
        #print(rgbs[pos*frames_to_process:pos*frames_to_process+stride])
        rgbs_p = frames_downsample(np.array(rgbs_p), 40)
        oflows_p = frames_downsample(np.array(oflows_p), 40)

        if lstmModel is not None:
            predictions = i3d_LSTM_prediction(rgbs_p, oflows_p, labels, lstmModel, rgb_model, oflow_model, nTop)
        else:
            predictions = get_predicts(rgbs_p, oflows_p, labels, oflow_model, rgb_model, nTop, False)

        #print("predictions:", predictions)
        tmp = [d for item in predictions for d in item.items()]
        
        if tmp[0][1] > threshold:
            keys = list(list(zip(*tmp))[0])
            vals = list(list(zip(*tmp))[1])
            if len(results) == 0:
                results.append(predictions)
            else:
                tmp_list = []
                added = False
                for result in results:
                    pred = [d for item in result for d in item.items()]
                    pred_keys = list(list(zip(*pred))[0])
                    pred_vals = list(list(zip(*pred))[1])
                    #if len([i for i,j in zip(pred_keys,keys) if i==j]) == 3:
                    #print("pred_keys[0]:",pred_keys[0])
                    #print("keys[0]:",keys[0])
                    if str(pred_keys[0]).strip() == str(keys[0]).strip() :
                        
                        avg = reduce(lambda x,y: x+y, vals) / len(vals)
                        pred_avg = reduce(lambda x,y: x+y, pred_vals) / len(pred_vals)
                        if avg > pred_avg:
                            results.remove(result)
                            #tmp_list.remove()
                            tmp_list.append(predictions)
                        added = True
                    
                if not added:
                    #print("no similar to this:",keys[0])
                    tmp_list.append(predictions)
                    added = True
                results.extend(tmp_list)
        
        Next = (pos+1)*frames_to_process-(pos*stride)
        pos += 1
        
        #print(f"is there from {Next} to {Next+stride}")
    

    def Phase(results):
        new_res = []
        key = [""] * 3
        val = [0] * 3
        idx = 0
        for result in results:
            for d in result:
                for i in d.items():
                    tmp_key,tmp_val = i
                    key[idx] += tmp_key + "-"
                    val[idx] += tmp_val
                    #print(key)
                    #print(val)
                idx += 1
                #print(idx)
            idx = 0
        
        for i in range(len(key)):
            new_res.append({key[i].rsplit("-",1)[0]:val[i]/len(results)})

            #print("new_res:", new_res)
        return new_res

    if len(results) > 0:
        predictions = Phase(results)
    else:
        predictions = [{'Unknown': 0.}, {'Unknown': 0.}, {'Unknown': 0.}]

    return predictions
Example #7
0
def live():
    gameDisplay.blit(carImg, (0, 0))

    # open a pointer to the webcam video stream
    oStream = video_start(device=1,
                          tuResolution=(320, 240),
                          nFramePerSecond=diVideoSet["nFpsAvg"])

    timer = Timer()
    sResults = ""
    nCount = 0
    while True:
        # show live video and wait for key stroke
        key = video_show(oStream,
                         "green",
                         "Press <blank> to start",
                         sResults,
                         tuRectangle=(h, w))

        # start!
        if key == ord(' '):
            # countdown n sec
            video_show(oStream,
                       "orange",
                       "Recording starts in ",
                       tuRectangle=(h, w),
                       nCountdown=3)

            # record video for n sec
            fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \
                                                  tuRectangle=(h, w), nTimeDuration=int(diVideoSet["fDurationAvg"]),
                                                  bOpticalFlow=False)
            print("\nCaptured video: %.1f sec, %s, %.1f fps" % \
                  (fElapsed, str(arFrames.shape), len(arFrames) / fElapsed))

            # show orange wait box
            frame_show(oStream,
                       "orange",
                       "Translating sign ...",
                       tuRectangle=(h, w))

            # crop and downsample frames
            arFrames = images_crop(arFrames, h, w)
            arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"])

            # Translate frames to flows - these are already scaled between [-1.0, 1.0]
            print("Calculate optical flow on %d frames ..." % len(arFrames))
            timer.start()
            arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True)
            print("Optical flow per frame: %.3f" %
                  (timer.stop() / len(arFrames)))

            # predict video from flows
            print("Predict video with %s ..." % (keI3D.name))
            arX = np.expand_dims(arFlows, axis=0)
            arProbas = keI3D.predict(arX, verbose=1)[0]
            nLabel, sLabel, fProba = probability2label(arProbas,
                                                       oClasses,
                                                       nTop=3)

            sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.)
            print(sResults)
            nCount += 1

            # quit
            break

    # do a bit of cleanup
    message_display(sResults)
    oStream.release()
    cv2.destroyAllWindows()