Ejemplo n.º 1
0
    def runTest(self):
        print "------------------ Test3  Populate from MP coeffs ---------"
        fileIndex = 2
        RandomAudioFilePath = file_names[fileIndex]
        print 'Working on %s' % RandomAudioFilePath
        sizes = [2**j for j in range(7, 15)]
        segDuration = 5
        nbAtom = 20

        pySig = signals.Signal(op.join(audio_files_path, RandomAudioFilePath),
                               mono=True,
                               normalize=True)

        segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1]
        nbSeg = floor(pySig.length / segmentLength)
        # cropping
        pySig.crop(0, segmentLength)

        # create dictionary
        pyDico = Dico(sizes)

        approx, decay = mp.mp(pySig, pyDico, 20, nbAtom, pad=True, debug=0)

        ppdb = XMDCTBDB('MPdb.db', load=False)
        #        ppdb.keyformat = None
        ppdb.populate(approx, None, fileIndex)

        nKeys = ppdb.get_stats()['ndata']
        # compare the number of keys in the base to the number of atoms

        print ppdb.get_stats()
        self.assertEqual(nKeys, approx.atom_number)

        # now try to recover the fileIndex knowing one of the atoms
        Key = [
            log(approx.atoms[0].length, 2),
            approx.atoms[0].reduced_frequency * pySig.fs
        ]
        T, fileI = ppdb.get(Key)
        Treal = (float(approx.atoms[0].time_position) / float(pySig.fs))
        print T, Treal
        self.assertEqual(fileI[0], fileIndex)
        Tpy = np.array(T)
        self.assertTrue((np.abs(Tpy - Treal)).min() < 0.1)

        # last check: what does a request for non-existing atom in base return?
        T, fileI = ppdb.get((11, 120.0))
        self.assertEqual(T, [])
        self.assertEqual(fileI, [])

        # now let's just retrieve the atoms from the base and see if they are
        # the same
        histograms = ppdb.retrieve(approx, None, offset=0)
        #        plt.figure()
        #        plt.imshow(histograms[0:10,:])
        #        plt.show()
        del ppdb
Ejemplo n.º 2
0
    def runTest(self):
        print "------------------ Test2  DB Handle ---------"
        print 'Creating a hash table using Berkeley DB'
        dbName = 'dummy.db'

        ppbdb = XMDCTBDB(dbName, load=False, persistent=False)
        ppbdb.keyformat = None
        print 'populating with a key from MP'
        F = 128
        T = 125.0
        FileIdx = 104

        ppbdb.add(zip((F, ), (T, )), FileIdx)
        estT, estFileI = ppbdb.get(F)

        # refactoring, renvoi une liste de candidats

        self.assertEqual(estFileI[0], FileIdx)
        self.assertTrue(abs(estT[0] - T) < 0.01)

        print ppbdb.get_stats()
        print ppbdb
        del ppbdb
Ejemplo n.º 3
0
    def runTest(self):
        ''' what happens when the same key is used with a different value? we should be able to retrieve
        a list of possible songs
        '''
        print "------------------ Test 7  handling uniqueness ---------"
        dummyDb = XMDCTBDB('dummyDb.db', load=False, persistent=False)
        dummyDb.keyformat = None
        dummyDb.add(zip((440, 128, 440), (12.0, 16.0, 45.0)), 1)
        dummyDb.add(zip((512, 320, 440), (54.0, 16.0, 11.0)), 2)

        #        print dummyDb.get_stats()
        self.assertEqual(6, dummyDb.get_stats()['ndata'])

        T, fileIndex = dummyDb.get(440)
        print zip(T, fileIndex)
Ejemplo n.º 4
0
    def runTest(self):
        ''' take the base previously constructed and retrieve the song index based on 10 atoms
        '''
        print "------------------ Test6  recognition ---------"

        nbCandidates = 8
        ppdb = XMDCTBDB('LargeMPdb.db', load=True)

        print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str(
            ppdb.get_stats()['ndata'])
        # Now take a song, decompose it and try to retrieve it
        fileIndex = 6
        RandomAudioFilePath = file_names[fileIndex]
        print 'Working on ' + str(RandomAudioFilePath)
        pySig = signals.Signal(op.join(audio_files_path, RandomAudioFilePath),
                               mono=True)

        pyDico = LODico(sizes)
        segDuration = 5
        offsetDuration = 7
        offset = offsetDuration * pySig.fs
        nbAtom = 50
        segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1]
        pySig.crop(offset, offset + segmentLength)

        approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True)

        #        plt.figure()
        #        approx.plotTF()
        #        plt.show()
        res = map(ppdb.get, map(ppdb.kform, approx.atoms),
                  [(a.time_position - pyDico.get_pad()) / approx.fs
                   for a in approx.atoms])
        #
        #res = map(bdb.get, map(bdb.kform, approx.atoms))

        histogram = np.zeros((600, nbCandidates))

        for i in range(approx.atom_number):
            print res[i]
            histogram[res[i]] += 1

        max1 = np.argmax(histogram[:])
        Offset1 = max1 / nbCandidates
        estFile1 = max1 % nbCandidates
        #        candidates , offsets = ppdb.retrieve(approx);
        #        print approx.atom_number
        histograms = ppdb.retrieve(approx, None, offset=0, nbCandidates=8)
        # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) ,
        # np.argmax(histograms, axis=1)

        #        plt.figure()
        #        plt.imshow(histograms[0:20,:],interpolation='nearest')
        #        plt.show()

        maxI = np.argmax(histograms[:])
        OffsetI = maxI / nbCandidates
        estFileI = maxI % nbCandidates

        print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI
        import matplotlib.pyplot as plt
        #        plt.figure(figsize=(12,6))
        #        plt.subplot(121)
        #        plt.imshow(histograms,aspect='auto',interpolation='nearest')
        #        plt.subplot(122)
        #        plt.imshow(histogram,aspect='auto',interpolation='nearest')
        ##        plt.imshow(histograms,aspect='auto',interpolation='nearest')
        ##        plt.colorbar()
        #        plt.show()

        print maxI, OffsetI, estFileI
        self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms))

        self.assertEqual(fileIndex, estFileI)
        self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
Ejemplo n.º 5
0
    def runTest(self):
        ''' time to test the fingerprinting scheme, create a base with 10 atoms for 8 songs, then
            Construct the histograms and retrieve the fileIndex and time offset that is the most
            plausible '''
        print "------------------ Test5  DB construction ---------"
        #        # create the base : persistent
        ppdb = XMDCTBDB('LargeMPdb.db', load=False, time_res=0.2)
        print ppdb
        padZ = 2 * sizes[-1]
        # BUGFIX: pour le cas MP classique: certains atome reviennent : pas
        # cool car paire key/data existe deja!
        pyDico = LODico(sizes)
        segDuration = 5
        nbAtom = 50
        sig = signals.LongSignal(op.join(audio_files_path, file_names[0]),
                                 frame_size=sizes[-1],
                                 mono=False,
                                 Noverlap=0)

        segmentLength = ((segDuration * sig.fs) / sizes[-1]) * sizes[-1]
        max_seg_num = 5
        #        " run MP on a number of files"
        nbFiles = 8
        keycount = 0
        for fileIndex in range(nbFiles):
            RandomAudioFilePath = file_names[fileIndex]
            print fileIndex, RandomAudioFilePath
            if not (RandomAudioFilePath[-3:] == 'wav'):
                continue

            pySig = signals.LongSignal(op.join(audio_files_path,
                                               RandomAudioFilePath),
                                       frame_size=segmentLength,
                                       mono=False,
                                       Noverlap=0)

            nbSeg = int(pySig.n_seg)
            print 'Working on ' + str(RandomAudioFilePath) + ' with ' + str(
                nbSeg) + ' segments'
            for segIdx in range(min(nbSeg, max_seg_num)):
                pySigLocal = pySig.get_sub_signal(segIdx,
                                                  1,
                                                  True,
                                                  True,
                                                  channel=0,
                                                  pad=padZ)
                print "MP on segment %d" % segIdx
                # run the decomposition
                approx, decay = mp.mp(pySigLocal,
                                      pyDico,
                                      40,
                                      nbAtom,
                                      pad=False)

                print "Populating database with offset " + str(
                    segIdx * segmentLength / sig.fs)
                ppdb.populate(approx,
                              None,
                              fileIndex,
                              offset=float((segIdx * segmentLength) - padZ) /
                              sig.fs)

                keycount += approx.atom_number

        print ppdb.get_stats()