Esempio n. 1
0
while (cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) <
       (int(args.e) * 1000)):
    ret, frame = cap.read()
    if frame == None:
        break

    if args.f == features[0]:
        h = ft.colorhist(frame)
    elif args.f == features[1]:
        h = temporal_diff(prev_frame, frame, 10)
    elif args.f == features[2] or args.f == features[3]:
        audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data)
        if args.f == features[2]:
            h = np.mean(audio_frame**2)
        elif args.f == features[3]:
            h, mspec, spec = ft.extract_mfcc(audio_frame, fs)
    elif args.f == features[4]:
        colorhist = ft.colorhist(frame)
        h = colorhist_diff(prev_colorhist, colorhist)
        prev_colorhist = colorhist

    if h != None:
        query_features.append(h)
    prev_frame = frame
    frame_nbr += 1

# Compare with database

video_types = ('*.mp4', '*.MP4', '*.avi')
audio_types = ('*.wav', '*.WAV')
Esempio n. 2
0
def process_videos(video_list, indx):
    total = len(video_list)
    progress_count = 0
    for video in video_list:
        progress_count += 1
        print 'processing: ', video, ' (', progress_count, ' of ', total, ')'
        cap = cv2.VideoCapture(video)
        frame_rate = get_frame_rate(video)
        total_frames = get_frame_count(video)
        total_audio_frames = get_frame_count_audio(video)

        # get corresponding audio file
        filename, fileExtension = os.path.splitext(video)
        audio = filename + '.wav'
        fs, wav_data = read(audio)

        colorhists = []
        sum_of_differences = []
        audio_powers = []
        mfccs = []
        colorhist_diffs = []

        prev_colorhist = None
        prev_frame = None
        frame_nbr = 0
        while (cap.isOpened()):
            ret, frame = cap.read()
            if frame == None:
                break
            audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data)

            # check if audio frame is long enough for mfcc transformation
            if len(audio_frame) >= int(0.01 * fs):
                power = np.mean(audio_frame**2)
                audio_powers.append(power)
                ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs)
                mfccs.append(ceps)

            # calculate sum of differences
            if not prev_frame == None:
                tdiv = temporal_diff(prev_frame, frame, 10)
                #diff = np.absolute(prev_frame - frame)
                #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2])
                sum_of_differences.append(tdiv)
            colorhist = ft.colorhist(frame)
            colorhists.append(colorhist)
            if not prev_colorhist == None:
                ch_diff = colorhist_diff(prev_colorhist, colorhist)
                colorhist_diffs.append(ch_diff)
            prev_colorhist = colorhist
            prev_frame = frame
            frame_nbr += 1
        print 'end:', frame_nbr

        # prepare descriptor for database
        # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are)
        # audio = descr['audio'] # Nx1 np array
        # colhist = descr['colhist'] # Nx3x256 np array
        # tempdif = descr['tempdiff'] # Nx1 np array
        descr = {}
        descr['mfcc'] = np.array(mfccs)
        descr['audio'] = np.array(audio_powers)
        descr['colhist'] = np.array(colorhists)
        descr['tempdiff'] = np.array(sum_of_differences)
        descr['chdiff'] = np.array(colorhist_diffs)
        indx.add_to_index(video, descr)
        print 'added ' + video + ' to database'
    indx.db_commit()
Esempio n. 3
0
)
parser.add_argument("wav_path", help="Path to wavfile")
parser.add_argument("-n",
                    help="Number of mfcc components that should be visualized",
                    default=13)

args = parser.parse_args()

# Read the wavfile
fs, data = wavfile.read(args.wav_path)

print 'Processing wavfile: ' + args.wav_path + ' ... '

print 'Calculating MFCCs ... '
# extract mfcc coefficents
ceps, mspec, spec = ft.extract_mfcc(data, fs)

print 'Generating spectogram ... '

# create pyplot figure with custom title
fig = plt.figure()
title = 'Analysis of ' + args.wav_path
fig.canvas.set_window_title(title)

# in upper subplot draw the spectogram
plt.subplot(2, 1, 1)
plt.specgram(data, Fs=fs)
plt.title('Spectogram of raw audio data')
plt.xlabel('time [s]')
plt.ylabel('Frequency [Hz]')
Esempio n. 4
0
def process_videos(video_list, indx):
    total = len(video_list)
    progress_count = 0
    for video in video_list:
        progress_count += 1
        print 'processing: ',video, ' (' ,progress_count, ' of ' ,total,')'
        cap = cv2.VideoCapture(video)
        frame_rate = get_frame_rate(video) 
        total_frames = get_frame_count(video)
        total_audio_frames = get_frame_count_audio(video)

        # get corresponding audio file
        filename, fileExtension = os.path.splitext(video)
        audio = filename + '.wav'
        fs, wav_data = read(audio)

        colorhists = []
        sum_of_differences = []
        audio_powers = []
        mfccs = []
        colorhist_diffs = []

        prev_colorhist = None
        prev_frame = None
        frame_nbr = 0
        while(cap.isOpened()):
            ret, frame = cap.read()
            if frame == None:
                break
            audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data)

            # check if audio frame is long enough for mfcc transformation
            if len(audio_frame) >= 256:
                power = np.mean(audio_frame**2)
                audio_powers.append(power)
                ceps, mspec, spec = ft.extract_mfcc(audio_frame, fs)
                mfccs.append(ceps)
                
            # calculate sum of differences
            if not prev_frame == None:
                tdiv = temporal_diff(prev_frame, frame, 10)
                #diff = np.absolute(prev_frame - frame)
                #sum = np.sum(diff.flatten()) / (diff.shape[0]*diff.shape[1]*diff.shape[2])
                sum_of_differences.append(tdiv)
            colorhist = ft.colorhist(frame)
            colorhists.append(colorhist)
            if not prev_colorhist == None:
                ch_diff = colorhist_diff(prev_colorhist, colorhist)
                colorhist_diffs.append(ch_diff)
            prev_colorhist = colorhist
            prev_frame = frame
            frame_nbr += 1
        print 'end:', frame_nbr
        
        # prepare descriptor for database
        # mfccs = descr['mfcc'] # Nx13 np array (or however many mfcc coefficients there are)
        # audio = descr['audio'] # Nx1 np array
        # colhist = descr['colhist'] # Nx3x256 np array
        # tempdif = descr['tempdiff'] # Nx1 np array
        descr = {}
        descr['mfcc'] = np.array(mfccs)
        descr['audio'] = np.array(audio_powers)
        descr['colhist'] = np.array(colorhists)
        descr['tempdiff'] = np.array(sum_of_differences)
        descr['chdiff'] = np.array(colorhist_diffs)
        indx.add_to_index(video,descr)
        print 'added ' + video + ' to database'
    indx.db_commit()
Esempio n. 5
0
cap.set(cv2.CAP_PROP_POS_MSEC, int(args.s)*1000)
while(cap.isOpened() and cap.get(cv2.CAP_PROP_POS_MSEC) < (int(args.e)*1000)):
    ret, frame = cap.read()
    if frame == None:
        break

    if args.f == features[0]: 
        h = ft.colorhist(frame)
    elif args.f == features[1]:
        h = temporal_diff(prev_frame, frame, 10)
    elif args.f == features[2] or args.f == features[3]:
        audio_frame = frame_to_audio(frame_nbr, frame_rate, fs, wav_data)
        if args.f == features[2]:
            h = np.mean(audio_frame**2)
        elif args.f == features[3]:
            h, mspec, spec = ft.extract_mfcc(audio_frame, fs)
    elif args.f == features[4]:
        colorhist = ft.colorhist(frame)
        h = colorhist_diff(prev_colorhist, colorhist)
        prev_colorhist = colorhist
            
    
    if h != None:
        query_features.append(h)
    prev_frame = frame
    frame_nbr += 1


# Compare with database

video_types = ('*.mp4', '*.MP4', '*.avi')