while (cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < (int(args.e) * 1000)): ret, frame = cap.read() if frame == None: break if args.f == features[0]: h = ft.colorhist(frame) elif args.f == features[1]: h = temporal_diff(prev_frame, frame, 10) elif args.f == features[2] or args.f == features[3]: audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) if args.f == features[2]: h = np.mean(audio_frame**2) elif args.f == features[3]: h, mspec, spec = ft.extract_mfcc(audio_frame, fs) elif args.f == features[4]: colorhist = ft.colorhist(frame) h = colorhist_diff(prev_colorhist, colorhist) prev_colorhist = colorhist if h != None: query_features.append(h) prev_frame = frame frame_nbr += 1 # Compare with database video_types = ('*.mp4', '*.MP4', '*.avi') audio_types = ('*.wav', '*.WAV')
def process_videos(video_list, indx): total = len(video_list) progress_count = 0 for video in video_list: progress_count += 1 print 'processing: ', video, ' (', progress_count, ' of ', total, ')' cap = cv2.VideoCapture(video) frame_rate = get_frame_rate(video) total_frames = get_frame_count(video) total_audio_frames = get_frame_count_audio(video) # get corresponding audio file filename, fileExtension = os.path.splitext(video) audio = filename + '.wav' fs, wav_data = read(audio) colorhists = [] sum_of_differences = [] audio_powers = [] mfccs = [] colorhist_diffs = [] prev_colorhist = None prev_frame = None frame_nbr = 0 while (cap.isOpened()): ret, frame = cap.read() if frame == None: break audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) # check if audio frame is long enough for mfcc transformation if len(audio_frame) >= int(0.01 * fs): power = np.mean(audio_frame**2) audio_powers.append(power) ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs) mfccs.append(ceps) # calculate sum of differences if not prev_frame == None: tdiv = temporal_diff(prev_frame, frame, 10) #diff = np.absolute(prev_frame - frame) #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2]) sum_of_differences.append(tdiv) colorhist = ft.colorhist(frame) colorhists.append(colorhist) if not prev_colorhist == None: ch_diff = colorhist_diff(prev_colorhist, colorhist) colorhist_diffs.append(ch_diff) prev_colorhist = colorhist prev_frame = frame frame_nbr += 1 print 'end:', frame_nbr # prepare descriptor for database # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are) # audio = descr['audio'] # Nx1 np array # colhist = descr['colhist'] # Nx3x256 np array # tempdif = descr['tempdiff'] # Nx1 np array descr = {} descr['mfcc'] = np.array(mfccs) descr['audio'] = np.array(audio_powers) descr['colhist'] = np.array(colorhists) descr['tempdiff'] = np.array(sum_of_differences) descr['chdiff'] = np.array(colorhist_diffs) indx.add_to_index(video, descr) print 'added ' + video + ' to database' indx.db_commit()
) parser.add_argument("wav_path", help="Path to wavfile") parser.add_argument("-n", help="Number of mfcc components that should be visualized", default=13) args = parser.parse_args() # Read the wavfile fs, data = wavfile.read(args.wav_path) print 'Processing wavfile: ' + args.wav_path + ' ... ' print 'Calculating MFCCs ... ' # extract mfcc coefficents ceps, mspec, spec = ft.extract_mfcc(data, fs) print 'Generating spectogram ... ' # create pyplot figure with custom title fig = plt.figure() title = 'Analysis of ' + args.wav_path fig.canvas.set_window_title(title) # in upper subplot draw the spectogram plt.subplot(2, 1, 1) plt.specgram(data, Fs=fs) plt.title('Spectogram of raw audio data') plt.xlabel('time [s]') plt.ylabel('Frequency [Hz]')
def process_videos(video_list, indx): total = len(video_list) progress_count = 0 for video in video_list: progress_count += 1 print 'processing: ',video, ' (' ,progress_count, ' of ' ,total,')' cap = cv2.VideoCapture(video) frame_rate = get_frame_rate(video) total_frames = get_frame_count(video) total_audio_frames = get_frame_count_audio(video) # get corresponding audio file filename, fileExtension = os.path.splitext(video) audio = filename + '.wav' fs, wav_data = read(audio) colorhists = [] sum_of_differences = [] audio_powers = [] mfccs = [] colorhist_diffs = [] prev_colorhist = None prev_frame = None frame_nbr = 0 while(cap.isOpened()): ret, frame = cap.read() if frame == None: break audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) # check if audio frame is long enough for mfcc transformation if len(audio_frame) >= 256: power = np.mean(audio_frame**2) audio_powers.append(power) ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs) mfccs.append(ceps) # calculate sum of differences if not prev_frame == None: tdiv = temporal_diff(prev_frame, frame, 10) #diff = np.absolute(prev_frame - frame) #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2]) sum_of_differences.append(tdiv) colorhist = ft.colorhist(frame) colorhists.append(colorhist) if not prev_colorhist == None: ch_diff = colorhist_diff(prev_colorhist, colorhist) colorhist_diffs.append(ch_diff) prev_colorhist = colorhist prev_frame = frame frame_nbr += 1 print 'end:', frame_nbr # prepare descriptor for database # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are) # audio = descr['audio'] # Nx1 np array # colhist = descr['colhist'] # Nx3x256 np array # tempdif = descr['tempdiff'] # Nx1 np array descr = {} descr['mfcc'] = np.array(mfccs) descr['audio'] = np.array(audio_powers) descr['colhist'] = np.array(colorhists) descr['tempdiff'] = np.array(sum_of_differences) descr['chdiff'] = np.array(colorhist_diffs) indx.add_to_index(video,descr) print 'added ' + video + ' to database' indx.db_commit()
cap.set(cv2.CAP_PROP_POS_MSEC, int(args.s)*1000) while(cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < (int(args.e)*1000)): ret, frame = cap.read() if frame == None: break if args.f == features[0]: h = ft.colorhist(frame) elif args.f == features[1]: h = temporal_diff(prev_frame, frame, 10) elif args.f == features[2] or args.f == features[3]: audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data) if args.f == features[2]: h = np.mean(audio_frame**2) elif args.f == features[3]: h, mspec, spec = ft.extract_mfcc(audio_frame, fs) elif args.f == features[4]: colorhist = ft.colorhist(frame) h = colorhist_diff(prev_colorhist, colorhist) prev_colorhist = colorhist if h != None: query_features.append(h) prev_frame = frame frame_nbr += 1 # Compare with database video_types = ('*.mp4', '*.MP4', '*.avi')