def process_video(weight_path, video_path): print "\nLoading data from disk..." video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print "Data loaded.\n" a = video.split_commands() show_square(video.sq[20:], video.avg_sq) ans_v = [] ans_r = [] if (a != []): for i in range(len(a)): if (i == 0): video.from_video_test(video_path, 0, a[i]) v, r = predict_videos(video, weight_path) ans_v.append(v) ans_r.append(r) if (i == len(a) - 1): video.from_video_test(video_path, a[i], -1, last=True) v, r = predict_videos(video, weight_path) ans_v.append(v) ans_r.append(r) break video.from_video_test(video_path, a[i], a[i + 1]) v, r = predict_videos(video, weight_path) ans_v.append(v) ans_r.append(r) return ans_v, ans_r
def predict(weight_path, video_path, absolute_max_string_len=32, output_size=28): print ("\nLoading data from disk...") video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print ("Data loaded.\n") if K.image_data_format() == 'channels_first': img_c, frames_n, img_w, img_h = video.data.shape else: frames_n, img_w, img_h, img_c = video.data.shape lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=output_size) adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam) lipnet.model.load_weights(weight_path) spell = Spell(path=PREDICT_DICTIONARY) decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, postprocessors=[labels_to_text, spell.sentence]) X_data = np.array([video.data]).astype(np.float32) / 255 input_length = np.array([len(video.data)]) y_pred = lipnet.predict(X_data) result = decoder.decode(y_pred, input_length)[0] return (video, result)
def load(video_path): print "\n[{}]\nLoading data from disk...".format(video_path) video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print "Data loaded.\n" return video
def predict(video_path): print "\nLoading data from disk..." video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print "Data loaded.\n" a = video.split_commands() ##slide disp d = 0 for item in video.avg_sq: d += item d = d/len(video.avg_sq) print('Avarage dispertion(slide) = ', d) #disp all avg_sq = 0 disp = 0 for i in range(len(video.sq)): avg_sq += video.sq[i] disp += video.sq[i]**2 avg_sq = ((avg_sq)*(avg_sq))/len(video.sq) disp = (disp - avg_sq)/len(video.sq) disp = np.sqrt(disp) print('disp = ', disp) #avarage square avg = 0 for item in video.sq: avg += item avg = avg/len(video.sq) print('Avarage square = ', avg) show_square(video.sq[20:],video.avg_sq)
def predict(weight_path, video_path, absolute_max_string_len=32, output_size=28): #print("\nLoading data from disk...") video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) #print("Data loaded.\n") if K.image_data_format() == 'channels_first': img_c, frames_n, img_w, img_h = video.data.shape else: frames_n, img_w, img_h, img_c = video.data.shape lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=output_size) if not MODEL.model: #lipnet = LipNet(img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, # absolute_max_string_len=absolute_max_string_len, output_size=output_size) #adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) #lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam) #lipnet.model.load_weights(weight_path) #print("Built Model.") #spell = Spell(path=PREDICT_DICTIONARY) #decoder = Decoder(greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, # postprocessors=[labels_to_text])#, spell.sentence]) MODEL.model = Prebuilt_model(weight_path, video_path, lipnet, absolute_max_string_len, output_size) X_data = np.array([video.data]).astype(np.float32) / 255 input_length = np.array([len(video.data)]) y_pred = MODEL.model.lipnet.predict(X_data) results = MODEL.model.decoder.decode(y_pred, input_length) print("Before cognitive services: " + results[0]) cog = cognitive() cog_result = cog.speech_to_text(cog.text_to_speech(results[0])) print("after cognitive services: " + cog_result) return (video, cog_result)