def livedemo(): # dataset diVideoSet = { "sName": "chalearn", "nClasses": 20, # number of classes "nFramesNorm": 40, # number of frames per video "nMinDim": 240, # smaller dimension of saved video-frames "tuShape": (240, 320), # height, width "nFpsAvg": 10, "nFramesAvg": 50, "fDurationAvg": 5.0 } # seconds # files sClassFile = "data-set/%s/%03d/class.csv" % (diVideoSet["sName"], diVideoSet["nClasses"]) sVideoDir = "data-set/%s/%03d" % (diVideoSet["sName"], diVideoSet["nClasses"]) print("\nStarting gesture recognition live demo ... ") print(os.getcwd()) print(diVideoSet) # load label description oClasses = VideoClasses(sClassFile) sModelFile = "model/20180627-0729-chalearn020-oflow-i3d-entire-best.h5" h, w = 224, 224 keI3D = I3D_load(sModelFile, diVideoSet["nFramesNorm"], (h, w, 2), oClasses.nClasses) # open a pointer to the webcam video stream oStream = video_start(device=1, tuResolution=(320, 240), nFramePerSecond=diVideoSet["nFpsAvg"]) #liVideosDebug = glob.glob(sVideoDir + "/train/*/*.*") nCount = 0 sResults = "" timer = Timer() # loop over action states while True: # show live video and wait for key stroke key = video_show(oStream, "green", "Press <blank> to start", sResults, tuRectangle=(h, w)) # start! if key == ord(' '): # countdown n sec video_show(oStream, "orange", "Recording starts in ", tuRectangle=(h, w), nCountdown=3) # record video for n sec fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \ tuRectangle = (h, w), nTimeDuration = int(diVideoSet["fDurationAvg"]), bOpticalFlow = False) print("\nCaptured video: %.1f sec, %s, %.1f fps" % \ (fElapsed, str(arFrames.shape), len(arFrames)/fElapsed)) # show orange wait box frame_show(oStream, "orange", "Translating sign ...", tuRectangle=(h, w)) # crop and downsample frames arFrames = images_crop(arFrames, h, w) arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"]) # Translate frames to flows - these are already scaled between [-1.0, 1.0] print("Calculate optical flow on %d frames ..." % len(arFrames)) timer.start() arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True) print("Optical flow per frame: %.3f" % (timer.stop() / len(arFrames))) # predict video from flows print("Predict video with %s ..." % (keI3D.name)) arX = np.expand_dims(arFlows, axis=0) arProbas = keI3D.predict(arX, verbose=1)[0] nLabel, sLabel, fProba = probability2label(arProbas, oClasses, nTop=3) sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.) print(sResults) nCount += 1 # quit elif key == ord('q'): break # do a bit of cleanup oStream.release() cv2.destroyAllWindows() return
from keras import backend as K from model_i3d import I3D_load from datagenerator import VideoClasses, FramesGenerator, generate_generator_multiple from model_i3d import Inception_Inflated3d, add_i3d_top import json #==== model frame number frames_num = 115 #==== model input type #sModelFile = "model_flow_mirror/20200706-0517-tsl100-oflow-i3d-entire-best.h5" #sModelFile = "model_rgb_mirror/20200711-0410-tsl100-115-oflow-i3d-entire-best.h5" sModelFile = "model_combined_mirror/115_rgb.h5" #==== model load h, w = 224, 224 keI3D = I3D_load(sModelFile, frames_num, (h, w, 2), 63) #keI3D = I3D_load(sModelFile, frames_num, (h, w, 3), 63) #keI3D = I3D_load(sModelFile, frames_num, (h, w, 2), 63) input_type = 'combined_test' sFolder = "%03d-%d" % (100, frames_num) sOflowDir = "data-temp/%s/%s/oflow" % ('tsl', sFolder) sImageDir = "data-temp/%s/%s/image" % ('tsl', sFolder) genFramesTest_flow = FramesGenerator(sOflowDir + "/test_videos", 1, frames_num, 224, 224, 2, bShuffle=False)
def livedemo(): fDurationAvg = 3.0 # seconds # files sClassFile = "class_ISL.csv" print("\nStarting gesture recognition live demo ... ") # load label description oClasses = VideoClasses(sClassFile) sModelFile = "model/20190322-1841-ISL105-oflow-i3d-top-best.h5" h, w = 224, 224 keI3D = I3D_load(sModelFile, 40, (h, w, 2), oClasses.nClasses) if (keI3D): print("Model loaded successfully") # open a pointer to the webcam video stream oStream = video_start(device=0, tuResolution=(320, 240), nFramePerSecond=10) nCount = 0 sResults = "" timer = Timer() # loop over action states while True: # show live video and wait for key stroke key = video_show(oStream, "green", "Press key to start", sResults, tuRectangle=(h, w)) # start! if (key == ord('3') or key == ord('5')): # countdown n sec video_show(oStream, "orange", "Recording starts in ", tuRectangle=(h, w), nCountdown=3) # record video for n sec if key == ord('3'): fDurationAvg = 3 fElapsed, arFrames, _ = video_capture( oStream, "red", "Recording ", tuRectangle=(h, w), nTimeDuration=int(fDurationAvg), bOpticalFlow=False) else: fDurationAvg = 5 fElapsed, arFrames, _ = video_capture( oStream, "red", "Recording ", tuRectangle=(h, w), nTimeDuration=int(fDurationAvg), bOpticalFlow=False) print("\nCaptured video: %.1f sec, %s, %.1f fps" % (fElapsed, str(arFrames.shape), len(arFrames) / fElapsed)) # show orange wait box frame_show(oStream, "orange", "Translating sign ...", tuRectangle=(h, w)) # crop and downsample frames arFrames = images_crop(arFrames, h, w) arFrames = frames_downsample(arFrames, 40) # Translate frames to flows - these are already scaled between [-1.0, 1.0] print("Calculate optical flow on %d frames ..." % len(arFrames)) timer.start() arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True) print("Optical flow per frame: %.3f" % (timer.stop() / len(arFrames))) # predict video from flows print("Predict video with %s ..." % (keI3D.name)) arX = np.expand_dims(arFlows, axis=0) arProbas = keI3D.predict(arX, verbose=1)[0] nLabel, sLabel, fProba = probability2label(arProbas, oClasses, nTop=3) sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.) print(sResults) nCount += 1 # quit elif key == ord('q'): break oStream.release() cv2.destroyAllWindows() return
"fDurationAvg": 5.0 } # seconds # files sClassFile = "class.csv" print("\nStarting gesture recognition live demo ... ") print(os.getcwd()) print(diVideoSet) # load label description oClasses = VideoClasses(sClassFile) sModelFile = "epochs_001-val_acc_0.980.hdf5" h, w = 224, 224 keI3D = I3D_load(sModelFile, diVideoSet["nFramesNorm"], (h, w, 2), oClasses.nClasses) def live(): gameDisplay.blit(carImg, (0, 0)) # open a pointer to the webcam video stream oStream = video_start(device=1, tuResolution=(320, 240), nFramePerSecond=diVideoSet["nFpsAvg"]) timer = Timer() sResults = "" nCount = 0 while True: # show live video and wait for key stroke