audio = filename + '.wav' fs, wav_data = wavfile.read(audio) query_features = [] prev_frame = None prev_colorhist = None frame_nbr = int(args.s) * frame_rate cap.set(cv2.CAP_PROP_POS_MSEC, int(args.s) * 1000) while (cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < (int(args.e) * 1000)): ret, frame = cap.read() if frame == None: break if args.f == features[0]: h = ft.colorhist(frame) elif args.f == features[1]: h = temporal_diff(prev_frame, frame, 10) elif args.f == features[2] or args.f == features[3]: audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) if args.f == features[2]: h = np.mean(audio_frame**2) elif args.f == features[3]: h, mspec, spec = ft.extract_mfcc(audio_frame, fs) elif args.f == features[4]: colorhist = ft.colorhist(frame) h = colorhist_diff(prev_colorhist, colorhist) prev_colorhist = colorhist if h != None: query_features.append(h)
def process_videos(video_list, indx): total = len(video_list) progress_count = 0 for video in video_list: progress_count += 1 print 'processing: ', video, ' (', progress_count, ' of ', total, ')' cap = cv2.VideoCapture(video) frame_rate = get_frame_rate(video) total_frames = get_frame_count(video) total_audio_frames = get_frame_count_audio(video) # get corresponding audio file filename, fileExtension = os.path.splitext(video) audio = filename + '.wav' fs, wav_data = read(audio) colorhists = [] sum_of_differences = [] audio_powers = [] mfccs = [] colorhist_diffs = [] prev_colorhist = None prev_frame = None frame_nbr = 0 while (cap.isOpened()): ret, frame = cap.read() if frame == None: break audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) # check if audio frame is long enough for mfcc transformation if len(audio_frame) >= int(0.01 * fs): power = np.mean(audio_frame**2) audio_powers.append(power) ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs) mfccs.append(ceps) # calculate sum of differences if not prev_frame == None: tdiv = temporal_diff(prev_frame, frame, 10) #diff = np.absolute(prev_frame - frame) #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2]) sum_of_differences.append(tdiv) colorhist = ft.colorhist(frame) colorhists.append(colorhist) if not prev_colorhist == None: ch_diff = colorhist_diff(prev_colorhist, colorhist) colorhist_diffs.append(ch_diff) prev_colorhist = colorhist prev_frame = frame frame_nbr += 1 print 'end:', frame_nbr # prepare descriptor for database # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are) # audio = descr['audio'] # Nx1 np array # colhist = descr['colhist'] # Nx3x256 np array # tempdif = descr['tempdiff'] # Nx1 np array descr = {} descr['mfcc'] = np.array(mfccs) descr['audio'] = np.array(audio_powers) descr['colhist'] = np.array(colorhists) descr['tempdiff'] = np.array(sum_of_differences) descr['chdiff'] = np.array(colorhist_diffs) indx.add_to_index(video, descr) print 'added ' + video + ' to database' indx.db_commit()
#Matrix for storing the frames frames = [] #Reading the frames and calculating the color histograms and temporal differences #Use the following line if you want to analyze the entire video, currently only the first 10% of the video is looked at print("Start reading") while (cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < q_total * 1000): #while(cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < q_total * 100): ret, frame = cap.read() if frame == None: break h_ch = ft.colorhist(frame) h_td = temporal_diff(prev_frame, frame, 10) if (h_ch != None) and (h_td != None): frames.append(frame) ch_features.append(h_ch) td_features.append(h_td) prev_frame = frame frame_nbr += 1 #Matrix for storing interesting frames interesting_frames = [] #Integer that keeps track how much the frames have changed since the previous selected frame. difference = 0 max_frame_skipped = 0
def process_videos(video_list, indx): total = len(video_list) progress_count = 0 for video in video_list: progress_count += 1 print 'processing: ',video, ' (' ,progress_count, ' of ' ,total,')' cap = cv2.VideoCapture(video) frame_rate = get_frame_rate(video) total_frames = get_frame_count(video) total_audio_frames = get_frame_count_audio(video) # get corresponding audio file filename, fileExtension = os.path.splitext(video) audio = filename + '.wav' fs, wav_data = read(audio) colorhists = [] sum_of_differences = [] audio_powers = [] mfccs = [] colorhist_diffs = [] prev_colorhist = None prev_frame = None frame_nbr = 0 while(cap.isOpened()): ret, frame = cap.read() if frame == None: break audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) # check if audio frame is long enough for mfcc transformation if len(audio_frame) >= 256: power = np.mean(audio_frame**2) audio_powers.append(power) ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs) mfccs.append(ceps) # calculate sum of differences if not prev_frame == None: tdiv = temporal_diff(prev_frame, frame, 10) #diff = np.absolute(prev_frame - frame) #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2]) sum_of_differences.append(tdiv) colorhist = ft.colorhist(frame) colorhists.append(colorhist) if not prev_colorhist == None: ch_diff = colorhist_diff(prev_colorhist, colorhist) colorhist_diffs.append(ch_diff) prev_colorhist = colorhist prev_frame = frame frame_nbr += 1 print 'end:', frame_nbr # prepare descriptor for database # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are) # audio = descr['audio'] # Nx1 np array # colhist = descr['colhist'] # Nx3x256 np array # tempdif = descr['tempdiff'] # Nx1 np array descr = {} descr['mfcc'] = np.array(mfccs) descr['audio'] = np.array(audio_powers) descr['colhist'] = np.array(colorhists) descr['tempdiff'] = np.array(sum_of_differences) descr['chdiff'] = np.array(colorhist_diffs) indx.add_to_index(video,descr) print 'added ' + video + ' to database' indx.db_commit()
sod2 = [] cd = [] while(cap.isOpened()): # retVal, frame = cap.read() # if retVal == False: break #== Do your processing here ==# # You might want to write separate functions and call those here gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = cv2.blur(frame, (11,11)) if prev_frame is not None: prev_hist = ft.colorhist(prev_frame) hist = ft.colorhist(frame) cd.append((np.abs(prev_hist - hist))) diff = np.abs(prev_frame.astype('int8') - frame.astype('int8')) normalized_diff = diff/(255.0) t = 0.2 thresholded_diff = (normalized_diff > t) * normalized_diff sod2.append(np.sum(normalized_diff/np.prod(diff.shape))) sod.append(np.sum(thresholded_diff)/np.prod(diff.shape)) ax1.cla() ax2.cla() ax1.plot(sod) ax1.plot(sod2) ax2.plot(cd[-1]) fig1.canvas.draw()
filename, fileExtension = os.path.splitext(args.query) audio = filename + '.wav' fs, wav_data = wavfile.read(audio) query_features = [] prev_frame = None prev_colorhist = None frame_nbr = int(args.s)*frame_rate cap.set(cv2.CAP_PROP_POS_MSEC, int(args.s)*1000) while(cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < (int(args.e)*1000)): ret, frame = cap.read() if frame == None: break if args.f == features[0]: h = ft.colorhist(frame) elif args.f == features[1]: h = temporal_diff(prev_frame, frame, 10) elif args.f == features[2] or args.f == features[3]: audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) if args.f == features[2]: h = np.mean(audio_frame**2) elif args.f == features[3]: h, mspec, spec = ft.extract_mfcc(audio_frame, fs) elif args.f == features[4]: colorhist = ft.colorhist(frame) h = colorhist_diff(prev_colorhist, colorhist) prev_colorhist = colorhist if h != None: