def runTest(self): ''' time to test the fingerprinting scheme, create a base with 200 atoms for 8 songs, then Construct the histograms and retrieve the fileIndex and time offset that is the most plausible ''' print "------------------ Test5 DB construction ---------" # # create the base : persistent ppdb = STFTPeaksBDB('LargeSTFTPeaksdb.db', load=False, time_res=0.2) print ppdb segDuration = 5 sig = LongSignal(op.join(audio_files_path, file_names[0]), frame_duration=segDuration, mono=False, Noverlap=0) segmentLength = sig.segment_size max_seg_num = 5 # " run sketchifier on a number of files" nbFiles = 8 keycount = 0 for fileIndex in range(nbFiles): RandomAudioFilePath = file_names[fileIndex] print fileIndex, RandomAudioFilePath if not (RandomAudioFilePath[-3:] == 'wav'): continue pySig = LongSignal(op.join(audio_files_path, RandomAudioFilePath), frame_size=segmentLength, mono=False, Noverlap=0) sk = sketch.STFTPeaksSketch(**{'scale': 512, 'step': 128}) nbSeg = int(pySig.n_seg) print 'Working on ' + str(RandomAudioFilePath) + ' with ' + str( nbSeg) + ' segments' for segIdx in range(min(nbSeg, max_seg_num)): pySigLocal = pySig.get_sub_signal(segIdx, 1, True, True, channel=0, pad=0) print "sketchify the segment %d" % segIdx # run the decomposition sk.recompute(pySigLocal) sk.sparsify(200) fgpt = sk.fgpt() print "Populating database with offset " + str( segIdx * segmentLength / sig.fs) ppdb.populate(fgpt, sk.params, fileIndex) # keycount += approx.atom_number print ppdb.get_stats()
fgpt_scripts.visu_dataset_stats - Created on Jul 1, 2013 @author: M. Moussallam ''' import os import os.path as op from classes import pydb, sketch from tools.fgpt_tools import db_creation, db_test from PyMP.signals import LongSignal import numpy as np # The RWC subset path audio_path = '/sons/rwc/Learn' db_path = '/home/manu/workspace/audio-sketch/fgpt_db' file_names = [f for f in os.listdir(audio_path) if '.wav' in f] nb_files = len(file_names) nb_segs = 0 # dataset length in seconds dur = [] for fileIndex in range(nb_files): l_sig = LongSignal(op.join(audio_path, file_names[fileIndex]), frame_duration=5.0, mono=True, Noverlap=0) dur.append(l_sig.n_seg * 5.0) nb_segs += l_sig.n_seg tot = np.sum(dur) hours = int(np.floor(tot / 3600)) minutes = int(np.floor((tot - (hours* 3600))/60)) print "Dataset last %d hours and %d minutes or %d segments of 5 secs"%(hours, minutes, nb_segs)
def runTest(self): ppdb = STFTPeaksBDB('tempdb.db', load=False, persistent=True, time_max=500.0) pySig = LongSignal(op.join(audio_files_path, file_names[0]), frame_duration=5, mono=False, Noverlap=0) self.assertEqual(pySig.segment_size, 5.0 * pySig.fs) max_nb_seg = 10 nb_atoms = 400 sk = sketch.STFTPeaksSketch(**{'scale': 512, 'step': 128}) for segIdx in range(min(max_nb_seg, pySig.n_seg)): pySigLocal = pySig.get_sub_signal(segIdx, 1, mono=True, normalize=False, channel=0, pad=0) print "sketchify segment %d" % segIdx # run the decomposition sk.recompute(pySigLocal) sk.sparsify(nb_atoms) fgpt = sk.fgpt() print "Populating database with offset " + str( segIdx * pySig.segment_size / pySig.fs) ppdb.populate(fgpt, sk.params, 0, offset=segIdx * pySig.segment_size / pySig.fs) # ok we have a DB with only 1 file and different segments, now nb_test_seg = 15 long_sig_test = LongSignal(op.join(audio_files_path, file_names[0]), frame_duration=5, mono=False, Noverlap=0.5) count = 0 for segIdx in range(min(nb_test_seg, long_sig_test.n_seg)): pySigLocal = long_sig_test.get_sub_signal(segIdx, 1, mono=True, normalize=False, channel=0, pad=0) # print "MP on segment %d" % segIdx # run the decomposition sk.recompute(pySigLocal) sk.sparsify(nb_atoms) fgpt = sk.fgpt() histograms = ppdb.retrieve(fgpt, sk.params, offset=0, nbCandidates=1) maxI = np.argmax(histograms[:]) OffsetI = maxI / 1 estFileI = maxI % 1 oracle_value = segIdx * long_sig_test.segment_size * ( 1 - long_sig_test.overlap) / long_sig_test.fs print "Seg %d Oracle: %1.1f - found %1.1f" % (segIdx, oracle_value, OffsetI) if abs(OffsetI - oracle_value) < 5: count += 1 glob = float(count) / float(min(nb_test_seg, long_sig_test.n_seg)) print "Global Score of %1.3f" % glob self.assertGreater(glob, 0.8)
# Measure the distortion of joint coding using approx of first patter as the reference max_rate = 1000 # maximum bitrate allowed (in bits) search_width = 1024 # maximum time shift allowed in samples info_dist = joint_coding_distortion(sig_occ2, app_1, max_rate, search_width) info_dist_rev = joint_coding_distortion(sig_occ1, app_2, max_rate, search_width) print "%1.5f - %1.5f"%(info_dist/target_srr, info_dist_rev/target_srr) # building the similarity matrix # Now load the long version from PyMP.signals import LongSignal seg_size = 5*8192 long_signal = LongSignal(op.join(os.environ['PYMP_PATH'],'data/Bach_prelude_40s.wav'), seg_size, mono=True, Noverlap=0.5) # decomposing the long signal apps, decays = mp.mp_long(long_signal, dico, target_srr, max_atom_num) dists = np.zeros((long_signal.n_seg, len(apps))) mp._initialize_fftw(apps[0].dico, max_thread_num=1) for idx in range(long_signal.n_seg): for jdx in range(idx): # test all preceeding segments only
import os.path as op import matplotlib.pyplot as plt import numpy as np os.environ['PYMP_PATH'] = '/home/manu/workspace/PyMP/' from PyMP.mdct import Dico, LODico from PyMP import mp, Signal from PyMP.signals import LongSignal from PyMP.mp_coder import joint_coding_distortion dico = Dico([128,1024,8192]) target_srr = 5 max_atom_num = 200 max_rate = 1000 seg_size = 5*8192 long_signal = LongSignal(op.join(os.environ['PYMP_PATH'],'data/Bach_prelude_40s.wav'), seg_size, mono=True, Noverlap=0.5) # limit to the first 64 segments long_signal.n_seg = 32 # decomposing the long signal apps, decays = mp.mp_long(long_signal, dico, target_srr, max_atom_num) mp._initialize_fftw(apps[0].dico, max_thread_num=1) dists = np.zeros((long_signal.n_seg, len(apps))) for idx in range(long_signal.n_seg):