def PredictGender1(vid, start, end, hog_svm_gen): print("Predict Gender") predicted_labels_hog_emo = [] # get random frames # get only the face of the frame # extract HoG Features # reload svm # predict label fps = vid.get(cv2.CAP_PROP_FPS) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if fps > 0: duration = end - start # in seconds else: print("can't render video") return -1, -1, -1 FramestoCapturePS = 1 # fps / 3 # number of (frames)/(frames to capture in each second) NumFrames = FramestoCapturePS * duration # frames to get from the whole vid counter = 0 if NumFrames > 30: NumFrames = 30 print(NumFrames) for f in range(int(NumFrames)): while True: # to ensure the captured frame contains a face counter = counter + 1 currentFrameTime = random.randint(int(fps * start), int(fps * end)) vid.set(1, currentFrameTime ) # "1" the property index to get the specified frame ret, frame1 = vid.read( ) # ret indicates success of read process(true/false)(ex: false > end of video) if not ret: # to ensure the frame was read correctly print(str(frameCount) + "err1") continue if not ret: print("err2") continue face1, img1, x, y = facedetecion.detect( frame1) # get only the face of each frame if counter == 100: break if face1 is not None: # check if either images does not contain a face go and get an other frame break if counter == 100: print("SKIP******************* Can't find a face") break hog_feature = get_HOG.getHOGfeatures128(face1) predicted_labels_hog_emo.append( int(hog_svm_gen.predict(hog_feature.reshape(1, -1))[1].ravel()[0])) # do majority voting and append respectively predicted_labels_hog_emocounter = collections.Counter( predicted_labels_hog_emo) try: final_label = predicted_labels_hog_emocounter.most_common(1)[0][0] return final_label except: print("no faces") return -1
def PredictGender(vid, start, end, hog_svm_gen): print("Predict Gender") predicted_labels_hog_emo = [] fps = vid.get(cv2.CAP_PROP_FPS) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if fps > 0: duration = float(frameCount) / float(fps) # in seconds else: print("can't render video") return -1 counter = 0 while True: # to ensure the captured frame contains a face counter = counter + 1 currentFrameTime = random.randint(int(fps * start), int(fps * end)) vid.set(1, currentFrameTime ) # "1" the property index to get the specified frame ret, frame = vid.read( ) # ret indicates success of read process(true/false)(ex: false > end of video) if not ret: # to ensure the frame was read correctly print(str(frameCount) + "err1") continue face, img, x, y = facedetecion.detect( frame) # get only the face of each frame if counter > 100: print("can't find faces in video") return -1 if face is not None: # check if either images does not contain a face go and get an other frame break hog_feature = get_HOG.getHOGfeatures128(face) final_label = int( hog_svm_gen.predict(hog_feature.reshape(1, -1))[1].ravel()[0]) return final_label
def write_labels(vid, emo_vote, gender_vote, start, end): print("write_labels") # Define the codec and create VideoWriter object font = cv.FONT_HERSHEY_SIMPLEX fps = vid.get(cv.CAP_PROP_FPS) frame_count = int(vid.get(cv.CAP_PROP_FRAME_COUNT)) start_frame = int(start * fps) end_frame = int(end * fps) #height = vid.get(cv.CAP_PROP_FRAME_HEIGHT) #width = vid.get(cv.CAP_PROP_FRAME_WIDTH) #fourcc = vid.get(cv.CAP_PROP_FOURCC) # cv.VideoWriter_fourcc(*' DIVX') #out = cv.VideoWriter("../output/"+'output.mp4', fourcc, int(fps), (int(width),int( height))) label_string = "" if emo_vote == 0: label_string = "Angry" elif emo_vote == 1: label_string = "Disgusted" elif emo_vote == 2: label_string = "Afraid" elif emo_vote == 3: label_string = "Happy" elif emo_vote == 4: label_string = "Neutral" elif emo_vote == 5: label_string = "Sad" elif emo_vote == 6: label_string = "Surprised" if gender_vote == 0: label_string += " Man" elif gender_vote == 1: label_string += " Woman" # loop over frames and write labels for each frame vid.set( 1, start_frame) # "1" the property index to get the specified frame for f in range(start_frame, end_frame): ret, frame = vid.read( ) # ret indicates success of read process(true/false)(ex: false > end of video) if not ret: # to ensure the frame was read correctly print(str(frame_count) + " err1") continue height, width, channels = frame.shape if width > 1000: frame = cv.resize(frame, (int(width / 3), int(height / 3))) face, img, x, y = facedetecion.detect( frame) # get only the face of each frame # writing labels n_frame = cv.putText(img, label_string, (x - 50, y - 10), font, 0.8, (0, 0, 255), 2, cv.LINE_AA) cv.imshow(label_string, n_frame) # out.write(n_frame) if cv.waitKey(1) & 0xFF == ord('q'): break # out.release() cv.destroyAllWindows() print(label_string)
def Train(file, target_dir): print("here") labels = [] hogDescriptors = [] hofDescriptors = [] print('start reading Data') print(datetime.datetime.now()) #fill features and labels with open(file) as f: next(f) for l in f: l = l.strip() if len(l) > 0: link, start, end, video, utterance, arousal, valence, Emotion = l.split( ',')[:8] video_dir = os.path.join(os.path.join(target_dir, video)) current_video_path = os.path.abspath( os.path.join(video_dir, utterance)) if os.path.exists(current_video_path): #ensure video exist print(str(current_video_path)) # read vid # get random "pairs" of frames # get only the face of the frame # extract HoG & HOF Features # Append label # Append features vid = cv2.VideoCapture( current_video_path) #get some video meta data fps = vid.get(cv2.CAP_PROP_FPS) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if fps == 0: vid.release() continue else: duration = float(frameCount) / float(fps) # in seconds FramestoCapturePS = 1 #fps/10 #number of (frames)/(pair of frames) to capture in each second NumFrames = int(FramestoCapturePS * duration) counter = 0 for i in range(NumFrames): face1 = [] face2 = [] img1 = [] while True: # to ensure the captured frame contains a face counter = counter + 1 currentFrameTime = random.randint( 0, frameCount - 2) vid.set( 1, currentFrameTime ) #"1" the proberty index to get the spicified frame ret, frame1 = vid.read( ) # ret indicates success of read process(true/false) (ex: returns false when end of #video reached) if not ret: # to ensure the frame was read correctly print(str(frameCount) + " err1") continue vid.set(1, currentFrameTime + 1) ret, frame2 = vid.read() if not ret: print("err2") continue face1, img1 = facedetecion.detect( frame1) # get only the face of each frame face2, img2 = facedetecion.detect(frame2) '''cv2.imshow('Video', face1) if cv2.waitKey(1) & 0xFF == ord('q'): break''' if counter == 100: break if face1 is not None: # chech if either images does not contain a face go and get an other frame if face2 is not None: break if counter == 100: print("SKIP******************* " + str(current_video_path)) break # only process one face hoffeature = get_HOF.getHOFfeatures(face1, face2) hofDescriptors.append(hoffeature) hogfeature = get_HOG.getHOGfeatures(face1) hogDescriptors.append(hogfeature) labels.append(int(Emotion)) #print("done") vid.release( ) #same as closing a file after reading #release software resource & release hardware resource(ex:camera) # save the features for further use np.save('hofDescriptors.npy', np.array(hofDescriptors)) np.save('hogDescriptors.npy', np.array(hogDescriptors)) np.save('labels.npy', np.array(labels)) print('start training HOGsvm') print(datetime.datetime.now()) #train with HOG features HOGsvm = cv2.ml.SVM_create() HOGsvm.setType(cv2.ml.SVM_C_SVC) HOGsvm.setKernel(cv2.ml.SVM_LINEAR) HOGsvm.train(np.array(hogDescriptors), cv2.ml.ROW_SAMPLE, np.array(labels)) HOGsvm.save('HOGsvm.xml') print('start training HOFsvm') print(datetime.datetime.now()) #train with HOF features HOFsvm = cv2.ml.SVM_create() HOFsvm.setType(cv2.ml.SVM_C_SVC) HOFsvm.setKernel(cv2.ml.SVM_LINEAR) #print(np.array(hofDescriptors).shape) #print(np.array(labels).shape) HOFsvm.train(np.array(hofDescriptors), cv2.ml.ROW_SAMPLE, np.array(labels)) HOFsvm.save('HOFsvm.xml')
def predictEmoHOF(vid, start, end, hof_svm_emo): print("PredictEmoHOF") predicted_labels_hof_emo = [] # get random "pairs" of frames # get only the face of the frame # extract HoG & HOF Features # reload svm # predict label fps = vid.get(cv2.CAP_PROP_FPS) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if fps > 0: duration = end - start # in seconds else: print("can't render video") return -1, -1, -1 FramestoCapturePS = fps / 3 # number of (frames)/(pair of frames) to capture in each second NumFrames = FramestoCapturePS * duration counter = 0 if NumFrames > 30: NumFrames = 30 print(NumFrames) for f in range(int(NumFrames)): while True: # ensuring the captured frame contains a face counter = counter + 1 currentFrameTime = random.randint(int(fps * start), int(fps * end)) vid.set(1, currentFrameTime ) # "1" the property index to get the specified frame ret, frame1 = vid.read( ) # ret indicates success of read process(true/false)(ex: false > end of video) if not ret: # to ensure the frame was read correctly print(str(frameCount) + " err1") continue vid.set(1, currentFrameTime + 1) ret, frame2 = vid.read() if not ret: print("err2") continue face1, img1, x, y = facedetecion.detect( frame1) # get only the face of each frame face2, img2, x, y = facedetecion.detect(frame2) if counter == 100: break if face1 is not None: # check if either images does not contain a face go and get an other frame if face2 is not None: break if counter == 100: print("SKIP******************* Can't find a face") break hoffeature = get_HOF.getHOFfeatures(face1, face2) predicted_labels_hof_emo.append( int(hof_svm_emo.predict(hoffeature.reshape(1, -1))[1].ravel()[0])) # do majority voting and append respectively predicted_labels_hof_emocounter = collections.Counter( predicted_labels_hof_emo) labelHof = 0 try: labelHof = predicted_labels_hof_emocounter.most_common(1)[0][0] except IndexError: print("there are no faces") return -1 return labelHof
def test(file, target_dir): print("here") labelsHog = [] labelsHof = [] labelsboth = [] labelsReal = [] HOGsvm = cv2.ml.SVM_load('HOGsvm.xml') HOFsvm = cv2.ml.SVM_load('HOFsvm.xml') print("doneloading") # fill features and labels with open(file) as f: next(f) for l in f: l = l.strip() if len(l) > 0: link, start, end, video, utterance, arousal, valence, Emotion = l.split( ',')[:8] video_dir = os.path.join(os.path.join(target_dir, video)) current_video_path = os.path.abspath( os.path.join(video_dir, utterance)) if os.path.exists(current_video_path): print(str(current_video_path)) vid = cv2.VideoCapture(current_video_path) fps = vid.get(cv2.CAP_PROP_FPS) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) if fps == 0: vid.release() #can't render video continue else: duration = float(frameCount) / float(fps) # in seconds FramestoCapturePS = 1 #fps / 3 # number of (frames)/(pair of frames) to capture in each second NumFrames = FramestoCapturePS * duration counter = 0 #if NumFrames > 15 : # NumFrames = 15 #print(NumFrames) predictedlabelsHOG = [] predictedlabelsHOF = [] for f in range(int(NumFrames)): while True: # to ensure the captured frame contains a face counter = counter + 1 currentFrameTime = random.randint( 0, frameCount - 2) vid.set( 1, currentFrameTime ) #"1" the proberty index to get the spicified frame ret, frame1 = vid.read( ) # ret indicates success of read process(true/false) (ex: returns false when end of #video reached) if not ret: # to ensure the frame was read correctly print(str(frameCount) + " err1") continue vid.set(1, currentFrameTime + 1) ret, frame2 = vid.read() if not ret: print("err2") continue face1, img1 = facedetecion.detect( frame1) # get only the face of each frame face2, img2 = facedetecion.detect(frame2) if counter == 100: break if face1 is not None: # check if either images does not contain a face go and get an other frame if face2 is not None: break if counter == 100: print("SKIP******************* " + str(current_video_path)) break hoffeature = get_HOF.getHOFfeatures(face1, face2) hogfeature = get_HOG.getHOGfeatures(face1) predictedlabelsHOF.append( int( HOFsvm.predict(hoffeature.reshape( 1, -1))[1].ravel()[0])) predictedlabelsHOG.append( int( HOGsvm.predict(hogfeature.reshape( 1, -1))[1].ravel()[0])) #print(predictedlabelsHOG) #print(predictedlabelsHOF) vid.release( ) #same as closing a file after reading #release software resource & release hardware resource(ex:camera) # do majority voting and append respectively predictedlabelsHOGcounter = collections.Counter( predictedlabelsHOG) predictedlabelsHOFcounter = collections.Counter( predictedlabelsHOF) predictedlabelsBOTHcounter = collections.Counter( predictedlabelsHOF + predictedlabelsHOG) #print(predictedlabelsHOGcounter.most_common(1)[0][0]) #print(predictedlabelsHOFcounter.most_common(1)[0][0]) #print(predictedlabelsBOTHcounter.most_common(1)[0][0]) try: labelsHog.append( predictedlabelsHOGcounter.most_common(1)[0][0]) labelsHof.append( predictedlabelsHOFcounter.most_common(1)[0][0]) labelsboth.append( predictedlabelsBOTHcounter.most_common(1)[0][0]) labelsReal.append(int(Emotion)) except IndexError: print(IndexError) continue a = np.asarray([labelsReal, labelsHog, labelsHof, labelsboth]) a = a.transpose() np.savetxt("results.csv", a, delimiter=",", fmt='%10.5f') #use zip to compare predicted vs real labels to calculate the accuracy #a = [5,1,1,2,1] b = [0,1,1,2,6] zip(a,b)=[(5, 0), (1, 1), (1, 1), (2, 2), (1, 6)] HogAccuracy = sum(1 for x, y in zip(labelsReal, labelsHog) if x == y) / float(len(labelsReal)) print("Hog Accuracy: " + str(HogAccuracy)) HOFAccuracy = sum(1 for x, y in zip(labelsReal, labelsHof) if x == y) / float(len(labelsReal)) print("HOF Accuracy: " + str(HOFAccuracy)) BothAccuracy = sum(1 for x, y in zip(labelsReal, labelsboth) if x == y) / float(len(labelsReal)) print("Hog & HOF Accuracy: " + str(BothAccuracy))