def testMerge(self): #setDebugLevel(GAlgorithms) ds = testdata.loadTestDB() ds1 = transform(ds, 'select', { 'descriptorNames': '*.mean' }) ds2 = transform(ds, 'select', { 'descriptorNames': '*.var' }) ds12 = transform(ds, 'select', { 'descriptorNames': [ '*.mean', '*.var'] }) ds_merged = mergeDataSets(ds1, ds2) # we need to do this because to add a Point we need it with the # original layout, not the FixedLength one testdata.resetSettings() ds_orig = testdata.loadTestDB() sp = ds_orig.samplePoint() # test if we can add points normally ds_merged.removePoint(sp.name()) ds_merged.addPoint(sp) # compare datasets contents self.compareDataSets(ds12, ds_merged) # test the mapDataSet function of the Merge applier ds_remapped = ds_merged.history().mapDataSet(ds_orig) self.compareDataSets(ds12, ds_remapped) self.compareDataSets(ds_merged, ds_remapped)
def testEnumerateKey(self): db = testdata.loadTestDB() testdata.useEnumerate = True dbe = testdata.loadTestDB() # also make sure we can map single points correctly # we need to load it separately and not take it from the dataset to ensure # that it'll have a different enum map p = Point() p.load('data/dataset_small/Vocal and Acapella/04 Blue Skies.mp3.sig') print(p.name()) #also create a transfo that forwards enums after we did the enumerate transfo dbes = transform(dbe, 'select', { 'descriptorNames': '*key*' }) pe = dbes.history().mapPoint(p) self.assertEqual(p['key_mode'], pe['key_mode']) self.assertEqual(p['key_key'], pe['key_key']) self.assertNotEqual(db.layout(), dbe.layout()) for p in db.points(): pe = dbe.point(p.name()) self.assertEqual(p.label('key_key'), pe.label('key_key')) self.assertEqual(p.label('key_mode'), pe.label('key_mode'))
def testEnumerateKey(self): db = testdata.loadTestDB() testdata.useEnumerate = True dbe = testdata.loadTestDB() # also make sure we can map single points correctly # we need to load it separately and not take it from the dataset to ensure # that it'll have a different enum map p = Point() p.load("data/dataset_small/Vocal and Acapella/04 Blue Skies.mp3.sig") print p.name() # also create a transfo that forwards enums after we did the enumerate transfo dbes = transform(dbe, "select", {"descriptorNames": "*key*"}) pe = dbes.history().mapPoint(p) self.assertEqual(p["key_mode"], pe["key_mode"]) self.assertEqual(p["key_key"], pe["key_key"]) self.assertNotEqual(db.layout(), dbe.layout()) for p in db.points(): pe = dbe.point(p.name()) self.assertEqual(p.label("key_key"), pe.label("key_key")) self.assertEqual(p.label("key_mode"), pe.label("key_mode"))
def testMerge(self): # setDebugLevel(GAlgorithms) ds = testdata.loadTestDB() ds1 = transform(ds, "select", {"descriptorNames": "*.mean"}) ds2 = transform(ds, "select", {"descriptorNames": "*.var"}) ds12 = transform(ds, "select", {"descriptorNames": ["*.mean", "*.var"]}) ds_merged = mergeDataSets(ds1, ds2) # we need to do this because to add a Point we need it with the # original layout, not the FixedLength one testdata.resetSettings() ds_orig = testdata.loadTestDB() sp = ds_orig.samplePoint() # test if we can add points normally ds_merged.removePoint(sp.name()) ds_merged.addPoint(sp) # compare datasets contents self.compareDataSets(ds12, ds_merged) # test the mapDataSet function of the Merge applier ds_remapped = ds_merged.history().mapDataSet(ds_orig) self.compareDataSets(ds12, ds_remapped) self.compareDataSets(ds_merged, ds_remapped)
def testHistory(self): ds = testdata.loadTestDB() ignored_descs = testdata.TEST_DATABASE_VARLENGTH_REAL testdata.resetSettings() ds_orig = testdata.loadTestDB() # cleaning, mandatory step ds = transform(ds, 'fixlength', { 'except': ignored_descs }) cleaned_db = transform(ds, 'cleaner', { 'except': ignored_descs }) # removing annoying descriptors, like mfcc.cov & mfcc.icov, who don't # like to be normalized like the other ones (constant value: dimension) no_mfcc_db = transform(cleaned_db, 'remove', { 'descriptorNames': '*mfcc*' }) # normalize, to have everyone change values normalized_db = transform(no_mfcc_db, 'normalize', { 'except': ignored_descs }) testPoints = [ '01 Oye Como Va - Santana.mp3', '02 Carmen Burana- O Fortuna.mp3', '07 Romeo and Juliet- the Knights\' Dance.mp3', '11 Lambada.mp3' ] for pointName in testPoints: p1 = normalized_db.point(pointName) p2 = normalized_db.history().mapPoint(ds_orig.point(pointName)) for name in p1.layout().descriptorNames(): self.assertEqual(p1[name], p2[name]) (tmpFile, tmpName) = tempfile.mkstemp() os.close(tmpFile) normalized_db.save(tmpName) reloaded_db = DataSet() reloaded_db.load(tmpName) for pointName in testPoints: p1 = normalized_db.point(pointName) p2 = normalized_db.history().mapPoint(ds_orig.point(pointName)) p3 = reloaded_db.point(pointName) p4 = reloaded_db.history().mapPoint(ds_orig.point(pointName)) self.assert_(p1.layout() == p2.layout()) self.assert_(p2.layout() == p3.layout()) self.assert_(p3.layout() == p4.layout()) for name in p1.layout().descriptorNames(): self.assertEqual(p1[name], p2[name]) self.assertEqual(p2[name], p3[name]) self.assertEqual(p3[name], p4[name]) # remove temp file os.remove(tmpName)
def testHistory(self): ds = testdata.loadTestDB() ignored_descs = testdata.TEST_DATABASE_VARLENGTH_REAL testdata.resetSettings() ds_orig = testdata.loadTestDB() # cleaning, mandatory step ds = transform(ds, 'fixlength', {'except': ignored_descs}) cleaned_db = transform(ds, 'cleaner', {'except': ignored_descs}) # removing annoying descriptors, like mfcc.cov & mfcc.icov, who don't # like to be normalized like the other ones (constant value: dimension) no_mfcc_db = transform(cleaned_db, 'remove', {'descriptorNames': '*mfcc*'}) # normalize, to have everyone change values normalized_db = transform(no_mfcc_db, 'normalize', {'except': ignored_descs}) testPoints = [ '01 Oye Como Va - Santana.mp3', '02 Carmen Burana- O Fortuna.mp3', '07 Romeo and Juliet- the Knights\' Dance.mp3', '11 Lambada.mp3' ] for pointName in testPoints: p1 = normalized_db.point(pointName) p2 = normalized_db.history().mapPoint(ds_orig.point(pointName)) for name in p1.layout().descriptorNames(): self.assertEqual(p1[name], p2[name]) (tmpFile, tmpName) = tempfile.mkstemp() os.close(tmpFile) normalized_db.save(tmpName) reloaded_db = DataSet() reloaded_db.load(tmpName) for pointName in testPoints: p1 = normalized_db.point(pointName) p2 = normalized_db.history().mapPoint(ds_orig.point(pointName)) p3 = reloaded_db.point(pointName) p4 = reloaded_db.history().mapPoint(ds_orig.point(pointName)) self.assert_(p1.layout() == p2.layout()) self.assert_(p2.layout() == p3.layout()) self.assert_(p3.layout() == p4.layout()) for name in p1.layout().descriptorNames(): self.assertEqual(p1[name], p2[name]) self.assertEqual(p2[name], p3[name]) self.assertEqual(p3[name], p4[name]) # remove temp file os.remove(tmpName)
def testAddField(self): ds = testdata.loadTestDB() dance = ds.samplePoint().value("danceability") # test exception when adding a field that already exists af = AnalyzerFactory.create( "addfield", {"string": ["metadata.artist", "metadata.album"], "real": "danceability"} ) self.assertRaises(Exception, af.analyze, ds) # test normal operation dsm = transform( ds, "addfield", { "string": ["metadata.artist", "metadata.album"], "real": ["metadata.year", "metadata.track", "metadata.ratings"], "default": {"metadata.album": "unknown", "metadata.year": 2012, "metadata.ratings": [12, 23, 34]}, }, ) p = dsm.samplePoint() self.assertEqual(p.value("metadata.year"), 2012) self.assertEqual(p.value("metadata.ratings"), (12, 23, 34)) self.assertEqual(p.value("metadata.track"), 0) self.assertEqual(p.label("metadata.artist"), "") self.assertEqual(p.label("metadata.album"), "unknown") self.assertEqual(p.value("danceability"), dance)
def testAddField(self): ds = testdata.loadTestDB() dance = ds.samplePoint().value('danceability') # test exception when adding a field that already exists af = AnalyzerFactory.create( 'addfield', { 'string': ['metadata.artist', 'metadata.album'], 'real': 'danceability' }) self.assertRaises(Exception, af.analyze, ds) # test normal operation dsm = transform( ds, 'addfield', { 'string': ['metadata.artist', 'metadata.album'], 'real': ['metadata.year', 'metadata.track', 'metadata.ratings'], 'default': { 'metadata.album': 'unknown', 'metadata.year': 2012, 'metadata.ratings': [12, 23, 34] } }) p = dsm.samplePoint() self.assertEqual(p.value('metadata.year'), 2012) self.assertEqual(p.value('metadata.ratings'), (12, 23, 34)) self.assertEqual(p.value('metadata.track'), 0) self.assertEqual(p.label('metadata.artist'), '') self.assertEqual(p.label('metadata.album'), 'unknown') self.assertEqual(p.value('danceability'), dance)
def testRCA(self): ds = testdata.loadTestDB() ds = transform(ds, "removevl") ds = transform(ds, "fixlength") ds = transform(ds, "remove", {"descriptorNames": "*cov"}) ds = transform(ds, "cleaner") ds = transform(ds, "normalize") ds = transform(ds, "pca", {"resultName": "pca15", "dimension": 15}) ds_rca = transform(ds, "rca", {"resultName": "rca10", "dimension": 10, "classFile": testdata.RCA_GENRE_GT}) v = View(ds_rca) dist = MetricFactory.create("euclidean", ds_rca.layout()) self.compareResults( v.nnSearch("01 Cigarettes And Alcohol - Oasis.mp3", dist).get(10), testdata.RCA_GENRE_RESULTS ) # try by passing directly the groundtruth map import gaia2.fastyaml as yaml ds_rca = transform( ds, "rca", {"resultName": "rca10", "dimension": 10, "classMap": yaml.load(open(testdata.RCA_GENRE_GT).read())}, ) v = View(ds_rca) dist = MetricFactory.create("euclidean", ds_rca.layout()) self.compareResults( v.nnSearch("01 Cigarettes And Alcohol - Oasis.mp3", dist).get(10), testdata.RCA_GENRE_RESULTS )
def testRCA(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') ds = transform(ds, 'remove', { 'descriptorNames': '*cov' }) ds = transform(ds, 'cleaner') ds = transform(ds, 'normalize') ds = transform(ds, 'pca', { 'resultName': 'pca15', 'dimension': 15 }) ds_rca = transform(ds, 'rca', { 'resultName': 'rca10', 'dimension': 10, 'classFile': testdata.RCA_GENRE_GT }) v = View(ds_rca) dist = MetricFactory.create('euclidean', ds_rca.layout()) self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10), testdata.RCA_GENRE_RESULTS) # try by passing directly the groundtruth map import gaia2.fastyaml as yaml ds_rca = transform(ds, 'rca', { 'resultName': 'rca10', 'dimension': 10, 'classMap': yaml.load(open(testdata.RCA_GENRE_GT).read()) }) v = View(ds_rca) dist = MetricFactory.create('euclidean', ds_rca.layout()) self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10), testdata.RCA_GENRE_RESULTS)
def testKullbackLeibler(self): ds = transform(testdata.loadTestDB(), 'fixlength') # creates a test with more than 1000 points otherwise the test is useless because # we split the workload in chunks of 1000 points when computing the distance dstest = DataSet() ncopy = 20 for cidx in range(ncopy): points = list(ds.points()) for p in points: p.setName(p.name() + '-%d' % cidx) dstest.addPoints(points) # test whether KL doesn't break with multithreading (did in 2.2.1) v = View(dstest) dist = MetricFactory.create('kullbackleibler', dstest.layout(), { 'descriptorName': 'mfcc' }) results = v.nnSearch(ds.samplePoint(), dist).get(6*ncopy) expected = [ 0.0 ]*2*ncopy + [ 6.1013755798339844 ]*ncopy expected += [ 6.4808731079101562 ]*2*ncopy + [ 6.7828292846679688 ]*ncopy for r, e in zip(results, expected): self.assertAlmostEqual(r[1], e, 5)
def loadPreparedTestDB(self): ds = testdata.loadTestDB() ds = transform(ds, "removevl") ds = transform(ds, "fixlength") ds = transform(ds, "remove", {"descriptorNames": ds.layout().descriptorNames(StringType)}) return ds
def testAddFieldFixedLength(self): ds = testdata.loadTestDB() ds_fl = transform(ds, "fixlength") ds_addvl = transform(ds, "addfield", {"real": "hello"}) ds_fl_addvl = transform(ds_fl, "addfield", {"real": "hello"}) self.assertEqual(ds_addvl.layout().descriptorLocation("hello").lengthType(), VariableLength) self.assertEqual(ds_fl_addvl.layout().descriptorLocation("hello").lengthType(), VariableLength) ds_addvl_fl = transform(ds_addvl, "fixlength") ds_fl_addvl_fl = transform(ds_fl_addvl, "fixlength") self.assertEqual(ds_addvl_fl.layout(), ds_fl_addvl_fl.layout()) ds_fl_addfl = transform(ds_fl, "addfield", {"real": "hello", "size": {"hello": 1}}) self.assertEqual(ds_fl_addfl.layout(), ds_fl_addvl_fl.layout()) self.assertEqual(ds_fl_addfl[0]["hello"], 0) ds_fl_addfl2 = transform( ds_fl, "addfield", { "real": "hello", "string": "youhou", "size": {"hello": 3, "youhou": 6}, "default": {"hello": [2, 5, 3], "youhou": ["a", "b", "c", "d", "e", "f"]}, }, ) self.assertEqual(ds_fl_addfl2.layout().descriptorLocation("hello").dimension(), 3) self.assertEqual(ds_fl_addfl2.layout().descriptorLocation("youhou").dimension(), 6) self.assertEqual(ds_fl_addfl2[0]["hello"], (2, 5, 3))
def testAddFieldFixedLength(self): ds = testdata.loadTestDB() ds_fl = transform(ds, 'fixlength') ds_addvl = transform(ds, 'addfield', { 'real': 'hello' }) ds_fl_addvl = transform(ds_fl, 'addfield', { 'real': 'hello' }) self.assertEqual(ds_addvl.layout().descriptorLocation('hello').lengthType(), VariableLength) self.assertEqual(ds_fl_addvl.layout().descriptorLocation('hello').lengthType(), VariableLength) ds_addvl_fl = transform(ds_addvl, 'fixlength') ds_fl_addvl_fl = transform(ds_fl_addvl, 'fixlength') self.assertEqual(ds_addvl_fl.layout(), ds_fl_addvl_fl.layout()) ds_fl_addfl = transform(ds_fl, 'addfield', { 'real': 'hello', 'size': { 'hello': 1 } }) self.assertEqual(ds_fl_addfl.layout(), ds_fl_addvl_fl.layout()) self.assertEqual(ds_fl_addfl[0]['hello'], 0) ds_fl_addfl2 = transform(ds_fl, 'addfield', { 'real': 'hello', 'string': 'youhou', 'size': { 'hello': 3, 'youhou': 6 }, 'default': { 'hello': [ 2, 5, 3 ], 'youhou': [ 'a', 'b', 'c', 'd', 'e', 'f' ] } }) self.assertEqual(ds_fl_addfl2.layout().descriptorLocation('hello').dimension(), 3) self.assertEqual(ds_fl_addfl2.layout().descriptorLocation('youhou').dimension(), 6) self.assertEqual(ds_fl_addfl2[0]['hello'], (2, 5, 3))
def testGaussianize(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') self.assertEqual(ds[0]['tempotap_bpm'], 104.28208160400391) ds = transform(ds, 'gaussianize') self.assertEqual(ds[0]['tempotap_bpm'], -0.1928621232509613)
def testGaussianize(self): ds = testdata.loadTestDB() ds = transform(ds, "removevl") ds = transform(ds, "fixlength") self.assertEqual(ds[0]["tempotap_bpm"], 104.28208160400391) ds = transform(ds, "gaussianize") self.assertEqual(ds[0]["tempotap_bpm"], -0.1928621232509613)
def testQt46FloatParameterBug(self): # Note: this was triggered by Qt 4.6 introducing a QVariant(float) constructor, which resulted # in pmapToPython to fail with an unknown type error (followed by a segfault)... ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') ds = transform(ds, 'removevl') ds = transform(ds, 'normalize') self.assertEqual(ds.history().toPython()[-1]['Applier parameters']['coeffs']['.barkbands.mean']['a'][0], 24.922689437866211)
def loadPreparedTestDB(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') ds = transform( ds, 'remove', {'descriptorNames': ds.layout().descriptorNames(StringType)}) return ds
def testQt46FloatParameterBug(self): # Note: this was triggered by Qt 4.6 introducing a QVariant(float) constructor, which resulted # in pmapToPython to fail with an unknown type error (followed by a segfault)... ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') ds = transform(ds, 'removevl') ds = transform(ds, 'normalize') self.assertEqual( ds.history().toPython()[-1]['Applier parameters']['coeffs'] ['.barkbands.mean']['a'][0], 24.922689437866211)
def testEnumerateThenFixLength(self): db = testdata.loadTestDB() dbe = testdata.enumerateStrings(db, exclude=testdata.TEST_DATABASE_VARLENGTH_STRING) dbefl = testdata.fixLength(dbe) for p in db.points(): pe = dbefl.point(p.name()) self.assertEqual(p.label("key_key"), pe.label("key_key")) self.assertEqual(p.label("key_mode"), pe.label("key_mode"))
def testSVM2(self): import yaml # TODO: need to add a test for text descriptors (ie: key*, ...) for t in [ 'singledesc', 'multidimdesc', 'all', 'probability' ]: history = TransfoChain() history.load('data/svm/test_svm_%s.history' % t) ds = testdata.loadTestDB() ds = history.mapDataSet(ds) gt = yaml.load(open('data/svm/test_svm_%s.gt.yaml' % t).read()) for p in ds.points(): self.assertEquals(p['genre'], gt[p.name()])
def testEnumerateThenFixLength(self): db = testdata.loadTestDB() dbe = testdata.enumerateStrings( db, exclude=testdata.TEST_DATABASE_VARLENGTH_STRING) dbefl = testdata.fixLength(dbe) for p in db.points(): pe = dbefl.point(p.name()) self.assertEqual(p.label('key_key'), pe.label('key_key')) self.assertEqual(p.label('key_mode'), pe.label('key_mode'))
def testParsedVsConstructedFilters(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') p = ds.samplePoint() p2 = ds.point('Higher State of Consciousness.mp3') queries = [ (p, '', ''), (p2, '', ''), (p2, 'WHERE value.tempotap_bpm.value > 140', Filter('tempotap_bpm.value', '>', 140)), (p, 'WHERE value.tempotap_bpm > 110', Filter('tempotap_bpm', '>', 110)), (p, 'WHERE value.tempotap_bpm > -10', Filter('tempotap_bpm', '>', -10)), (p, 'WHERE value.tempotap_bpm > 23000', Filter('tempotap_bpm', '>', 23000)), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130', AndFilter([ Filter('tempotap_bpm', '>', 120), Filter('tempotap_bpm', '<', 130) ])), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120', Filter('tempotap_bpm', 'between', [130, 120])), (p, 'WHERE label.key_key = "C"', Filter('key_key', '==', 'C')), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''', AndFilter([ OrFilter([ AndFilter([ Filter('key_key', '==', 'A'), Filter('key_mode', '==', 'major') ]), AndFilter([ Filter('key_key', '==', 'E'), Filter('key_mode', '==', 'minor') ]) ]), Filter('tempotap_bpm', '<', 90) ])) ] dist = MetricFactory.create('euclidean', ds.layout(), {'descriptorNames': '*.mean'}) v = View(ds) for (pt, filtstr, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filtstr), v.nnSearch(pt, dist, filt))
def testSVM2(self): import yaml # TODO: need to add a test for text descriptors (ie: key*, ...) for t in ["singledesc", "multidimdesc", "all", "probability"]: history = TransfoChain() history.load("data/svm/test_svm_%s.history" % t) ds = testdata.loadTestDB() ds = history.mapDataSet(ds) gt = yaml.load(open("data/svm/test_svm_%s.gt.yaml" % t).read()) for p in ds.points(): self.assertEquals(p["genre"], gt[p.name()])
def testWrongArgument(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') ds = transform(ds, 'removevl') ds = transform(ds, 'cleaner') ds = transform(ds, 'normalize') # missing param: className self.assertRaises(Exception, transform, ds, 'svmtrain', { 'descriptorNames': '*.mean' }) # wrong param: descriptorName self.assertRaises(Exception, transform, ds, 'svmtrain', { 'className': 'kloug', 'descriptorName': '*.mean' }) # missing param: resultName self.assertRaises(Exception, transform, ds, 'pca', { 'dimension': 15, 'resultName': '' })
def testDoesntBlowup(self): ds = testdata.loadTestDB() ignored_descs = testdata.TEST_DATABASE_VARLENGTH_REAL ds = transform(ds, 'fixlength', { 'except': ignored_descs }) dsc = transform(ds, 'cleaner', { 'except': ignored_descs }) dsr = transform(dsc, 'remove', { 'descriptorNames': '*mfcc*' }) dsr2 = transform(dsc, 'remove', { 'descriptorNames': [ '*mfcc*' ] }) del dsr2 del dsc del ds dsn = transform(dsr, 'normalize', { 'except': ignored_descs }) dspca = transform(dsn, 'pca', { 'resultName': 'pca30', 'dimension': 30, 'descriptorNames': [ '*.mean', '*.var' ] })
def testDeleteUnderlyingDataSet(self): ds = testdata.loadTestDB() params = {'descriptorNames': ['*.mean', '*.var']} ds = transform(ds, 'fixlength', params) ds = transform(ds, 'cleaner', params) ds = transform(ds, 'normalize', params) dist = MetricFactory.create('euclidean', ds.layout(), params) v = View(ds) del ds #self.assertRaises(Exception, v.nnSearch, '01 Respect.mp3') # this doesn't throw anymore, as the View keeps a ref to the dataset v.nnSearch('01 Respect.mp3', dist)
def testSubspaceSearch(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') dist = MetricFactory.create('euclidean', ds.layout(), { 'descriptorNames': '*.mean' }) v = View(ds) pid = 'Higher State of Consciousness.mp3' key_a_minor = v.nnSearch(pid, dist, 'WHERE label.key_key = "A" AND label.key_mode = "minor"') key_a = v.nnSearch(pid, dist, 'WHERE label.key_key = "A"') key_minor = v.nnSearch(pid, dist, 'WHERE label.key_mode = "minor"') key_a_minor_sspace1 = v.nnSearch(pid, key_minor, dist, 'WHERE label.key_key = "A"') key_a_minor_sspace2 = v.nnSearch(pid, key_a, dist, 'WHERE label.key_mode = "minor"') self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace1) self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace2)
def testDeleteUnderlyingDataSet(self): ds = testdata.loadTestDB() params = { 'descriptorNames': [ '*.mean', '*.var' ] } ds = transform(ds, 'fixlength', params) ds = transform(ds, 'cleaner', params) ds = transform(ds, 'normalize', params) dist = MetricFactory.create('euclidean', ds.layout(), params) v = View(ds) del ds #self.assertRaises(Exception, v.nnSearch, '01 Respect.mp3') # this doesn't throw anymore, as the View keeps a ref to the dataset v.nnSearch('01 Respect.mp3', dist)
def testDoesntBlowup(self): ds = testdata.loadTestDB() ignored_descs = testdata.TEST_DATABASE_VARLENGTH_REAL ds = transform(ds, 'fixlength', {'except': ignored_descs}) dsc = transform(ds, 'cleaner', {'except': ignored_descs}) dsr = transform(dsc, 'remove', {'descriptorNames': '*mfcc*'}) dsr2 = transform(dsc, 'remove', {'descriptorNames': ['*mfcc*']}) del dsr2 del dsc del ds dsn = transform(dsr, 'normalize', {'except': ignored_descs}) dspca = transform( dsn, 'pca', { 'resultName': 'pca30', 'dimension': 30, 'descriptorNames': ['*.mean', '*.var'] })
def testWrongArgument(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') ds = transform(ds, 'removevl') ds = transform(ds, 'cleaner') ds = transform(ds, 'normalize') # missing param: className self.assertRaises(Exception, transform, ds, 'svmtrain', {'descriptorNames': '*.mean'}) # wrong param: descriptorName self.assertRaises(Exception, transform, ds, 'svmtrain', { 'className': 'kloug', 'descriptorName': '*.mean' }) # missing param: resultName self.assertRaises(Exception, transform, ds, 'pca', { 'dimension': 15, 'resultName': '' })
def testParsedVsConstructedFilters(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') p = ds.samplePoint() p2 = ds.point('Higher State of Consciousness.mp3') queries = [ (p, '', ''), (p2, '', ''), (p2, 'WHERE value.tempotap_bpm.value > 140', Filter('tempotap_bpm.value', '>', 140)), (p, 'WHERE value.tempotap_bpm > 110', Filter('tempotap_bpm', '>', 110)), (p, 'WHERE value.tempotap_bpm > -10', Filter('tempotap_bpm', '>', -10)), (p, 'WHERE value.tempotap_bpm > 23000', Filter('tempotap_bpm', '>', 23000)), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130', AndFilter([ Filter('tempotap_bpm', '>', 120), Filter('tempotap_bpm', '<', 130) ])), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120', Filter('tempotap_bpm', 'between', [ 130, 120 ])), (p, 'WHERE label.key_key = "C"', Filter('key_key', '==', 'C')), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''', AndFilter([ OrFilter([ AndFilter([ Filter('key_key', '==', 'A'), Filter('key_mode', '==', 'major') ]), AndFilter([ Filter('key_key', '==', 'E'), Filter('key_mode', '==', 'minor') ]) ]), Filter('tempotap_bpm', '<', 90) ]) ) ] dist = MetricFactory.create('euclidean', ds.layout(), { 'descriptorNames': '*.mean' }) v = View(ds) for (pt, filtstr, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filtstr), v.nnSearch(pt, dist, filt))
def testCollectionMultipleAdd(self): d = testdata.loadTestDB() d1 = DataSet() d2 = DataSet() d3 = DataSet() # add them one by one for p in d.points(): d1.addPoint(p) # add them all at once d2.addPoints(d) # add them all at once using a list of points d3.addPoints([ p for p in d.points() ]) self.assertEquals(d1.size(), d2.size()) for p1, p2 in zip(d1.points(), d2.points()): self.assertEqual(p1, p2) self.assertEquals(d1.size(), d3.size()) for p1, p3 in zip(d1.points(), d3.points()): self.assertEqual(p1, p3)
def testCollectionMultipleAdd(self): d = testdata.loadTestDB() d1 = DataSet() d2 = DataSet() d3 = DataSet() # add them one by one for p in d.points(): d1.addPoint(p) # add them all at once d2.addPoints(d) # add them all at once using a list of points d3.addPoints([p for p in d.points()]) self.assertEquals(d1.size(), d2.size()) for p1, p2 in zip(d1.points(), d2.points()): self.assertEqual(p1, p2) self.assertEquals(d1.size(), d3.size()) for p1, p3 in zip(d1.points(), d3.points()): self.assertEqual(p1, p3)
def testAppendDataSet(self): ds = testdata.loadTestDB() ds2 = self.createSequentialDataSet() # check homogeneous collections self.assertRaises(Exception, ds.appendDataSet, ds2) # check no duplicates self.assertRaises(Exception, ds2.appendDataSet, ds2) # check normal operation of appendCollection ppoints = [ Point(p) for p in ds2.points() ] for p in ppoints: p.setName('p' + p.name()) ds3 = DataSet() ds3.addPoints(ppoints) ds3.appendDataSet(ds2) for p in ds3.points(): if p.name()[0] == 'p': self.assert_(p == ds3.point(p.name()[1:])) else: self.assert_(p == ds3.point('p' + p.name()))
def testAppendDataSet(self): ds = testdata.loadTestDB() ds2 = self.createSequentialDataSet() # check homogeneous collections self.assertRaises(Exception, ds.appendDataSet, ds2) # check no duplicates self.assertRaises(Exception, ds2.appendDataSet, ds2) # check normal operation of appendCollection ppoints = [Point(p) for p in ds2.points()] for p in ppoints: p.setName('p' + p.name()) ds3 = DataSet() ds3.addPoints(ppoints) ds3.appendDataSet(ds2) for p in ds3.points(): if p.name()[0] == 'p': self.assert_(p == ds3.point(p.name()[1:])) else: self.assert_(p == ds3.point('p' + p.name()))
def testRegressionIndexing(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') p = ds.samplePoint().name() p2 = 'Higher State of Consciousness.mp3' queries = [ (p, ''), (p2, ''), (p, 'WHERE (True AND True) and (true and TRUE)'), (p, 'WHERE (false AND True) OR (true and false)'), (p2, 'WHERE value.tempotap_bpm.value > 140'), (p, 'WHERE true AND value.tempotap_bpm.value > 140'), (p, 'WHERE value.tempotap_bpm > 110'), (p, 'WHERE value.tempotap_bpm > -10'), (p, 'WHERE value.tempotap_bpm < -10'), (p, 'WHERE value.tempotap_bpm > 23000'), (p, 'WHERE value.tempotap_bpm < 23000'), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN -2.3 AND 4096'), (p, "WHERE value.tempotap_bpm BETWEEN -2.3 AND -1.4"), (p, "WHERE value.tempotap_bpm BETWEEN 2048 AND 4096"), (p, 'WHERE label.key_key = "C"'), (p, 'WHERE label.key_key != "C"'), (p, 'WHERE label.key_key = "X"'), (p, 'WHERE label.key_key != "X"'), (p, 'WHERE label.key_key != "C" AND label.key_mode != "major"'), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''' ) ] # test with standard views dist = MetricFactory.create('euclidean', ds.layout(), { 'descriptorNames': '*.mean' }) v = View(ds) vidx = View(ds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt)) # test with frozen views dsr = transform(ds, 'select', { 'descriptorNames': ds.layout().descriptorNames(RealType) }) dsnorm = transform(dsr, 'normalize') dspca = transform(dsnorm, 'pca', { 'resultName': 'pca', 'dimension': 25 }) fds = FrozenDataSet() fds.fromDataSet(dspca) fds.setReferenceDataSet(ds) dist = FrozenDistanceFactory.create('Euclidean', fds.layout(), { 'descriptorName': 'pca' }) v = FrozenView(fds) vidx = FrozenView(fds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt))
def testRegressionIndexing(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') p = ds.samplePoint().name() p2 = 'Higher State of Consciousness.mp3' queries = [ (p, ''), (p2, ''), (p, 'WHERE (True AND True) and (true and TRUE)'), (p, 'WHERE (false AND True) OR (true and false)'), (p2, 'WHERE value.tempotap_bpm.value > 140'), (p, 'WHERE true AND value.tempotap_bpm.value > 140'), (p, 'WHERE value.tempotap_bpm > 110'), (p, 'WHERE value.tempotap_bpm > -10'), (p, 'WHERE value.tempotap_bpm < -10'), (p, 'WHERE value.tempotap_bpm > 23000'), (p, 'WHERE value.tempotap_bpm < 23000'), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN -2.3 AND 4096'), (p, "WHERE value.tempotap_bpm BETWEEN -2.3 AND -1.4"), (p, "WHERE value.tempotap_bpm BETWEEN 2048 AND 4096"), (p, 'WHERE label.key_key = "C"'), (p, 'WHERE label.key_key != "C"'), (p, 'WHERE label.key_key = "X"'), (p, 'WHERE label.key_key != "X"'), (p, 'WHERE label.key_key != "C" AND label.key_mode != "major"'), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''') ] # test with standard views dist = MetricFactory.create('euclidean', ds.layout(), {'descriptorNames': '*.mean'}) v = View(ds) vidx = View(ds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt)) # test with frozen views dsr = transform( ds, 'select', {'descriptorNames': ds.layout().descriptorNames(RealType)}) dsnorm = transform(dsr, 'normalize') dspca = transform(dsnorm, 'pca', { 'resultName': 'pca', 'dimension': 25 }) fds = FrozenDataSet() fds.fromDataSet(dspca) fds.setReferenceDataSet(ds) dist = FrozenDistanceFactory.create('Euclidean', fds.layout(), {'descriptorName': 'pca'}) v = FrozenView(fds) vidx = FrozenView(fds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt))