def testRCA(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') ds = transform(ds, 'remove', { 'descriptorNames': '*cov' }) ds = transform(ds, 'cleaner') ds = transform(ds, 'normalize') ds = transform(ds, 'pca', { 'resultName': 'pca15', 'dimension': 15 }) ds_rca = transform(ds, 'rca', { 'resultName': 'rca10', 'dimension': 10, 'classFile': testdata.RCA_GENRE_GT }) v = View(ds_rca) dist = MetricFactory.create('euclidean', ds_rca.layout()) self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10), testdata.RCA_GENRE_RESULTS) # try by passing directly the groundtruth map import gaia2.fastyaml as yaml ds_rca = transform(ds, 'rca', { 'resultName': 'rca10', 'dimension': 10, 'classMap': yaml.load(open(testdata.RCA_GENRE_GT).read()) }) v = View(ds_rca) dist = MetricFactory.create('euclidean', ds_rca.layout()) self.compareResults(v.nnSearch('01 Cigarettes And Alcohol - Oasis.mp3', dist).get(10), testdata.RCA_GENRE_RESULTS)
def testKullbackLeibler(self): ds = transform(testdata.loadTestDB(), 'fixlength') # creates a test with more than 1000 points otherwise the test is useless because # we split the workload in chunks of 1000 points when computing the distance dstest = DataSet() ncopy = 20 for cidx in range(ncopy): points = list(ds.points()) for p in points: p.setName(p.name() + '-%d' % cidx) dstest.addPoints(points) # test whether KL doesn't break with multithreading (did in 2.2.1) v = View(dstest) dist = MetricFactory.create('kullbackleibler', dstest.layout(), { 'descriptorName': 'mfcc' }) results = v.nnSearch(ds.samplePoint(), dist).get(6*ncopy) expected = [ 0.0 ]*2*ncopy + [ 6.1013755798339844 ]*ncopy expected += [ 6.4808731079101562 ]*2*ncopy + [ 6.7828292846679688 ]*ncopy for r, e in zip(results, expected): self.assertAlmostEqual(r[1], e, 5)
def testAngleDistance(self): ds = createDataSet() ds = transform(ds, 'fixlength') dist = MetricFactory.create('CosineAngle', ds.layout()) self.assertEqual(dist(ds.point('p1'), ds.point('p1')), 0.0) self.assertEqual(dist(ds.point('p1'), ds.point('p2')), 0.5) self.assertEqual(dist(ds.point('p1'), ds.point('p3')), 1.0) self.assertEqual(dist(ds.point('p1'), ds.point('p4')), 0.25) self.assertEqual(dist(ds.point('p1'), ds.point('p5')), 0.25) self.assertEqual(dist(ds.point('p4'), ds.point('p5')), 0.5) self.assertRaises(Exception, dist, ds.point('p0'), ds.point('p1')) d = MetricFactory.create('CosineAngle', ds.layout(), { 'defaultValue': 0.5 }) self.assertEqual(d(ds.point('p0'), ds.point('p1')), 0.5)
def testExponentialCompress(self): ds = createDataSet() ds = transform(ds, 'fixlength') dist = MetricFactory.create('ExponentialCompress', ds.layout(), { 'distance': 'euclidean' }) self.assertEqual(dist(ds.point('p1'), ds.point('p1')), 0.0) self.assertAlmostEqual(dist(ds.point('p1'), ds.point('p0')), 0.63212056) # 1-exp(-1) self.assertAlmostEqual(dist(ds.point('p1'), ds.point('p3')), 0.86466472) # 1-exp(-2)
def evaluate_1NN(dataset, groundTruth, distance, params = {}): from gaia2 import View, MetricFactory view = View(dataset, MetricFactory.create(distance, dataset.layout(), params)) #classifier = lambda p: dataset.point(view.nnSearch(p, 2)[0][0]).label('genre') classifier = lambda p: search(dataset, view, p) confusion = evaluate(dataset, classifier, groundTruth) return confusion
def evaluate_1NN(dataset, groundTruth, distance, params={}): from gaia2 import View, MetricFactory view = View(dataset, MetricFactory.create(distance, dataset.layout(), params)) #classifier = lambda p: dataset.point(view.nnSearch(p, 2)[0][0]).label('genre') classifier = lambda p: search(dataset, view, p) confusion = evaluate(dataset, classifier, groundTruth) return confusion
def train_1NN(dataset, groundTruth, distance, params, dropBestResult=False): resultIndex = 1 if dropBestResult else 0 def search(dataset, view, p): similarPoint = dataset.point(view.nnSearch(p).get(2)[resultIndex][0]) return groundTruth[similarPoint.name()] view = View(dataset, MetricFactory.create(distance, dataset.layout(), params)) classifier = lambda p: str(search(dataset, view, p)) return classifier
def train_1NN(dataset, groundTruth, distance, params, dropBestResult = False): resultIndex = 1 if dropBestResult else 0 def search(dataset, view, p): similarPoint = dataset.point(view.nnSearch(p).get(2)[resultIndex][0]) return groundTruth[similarPoint.name()] view = View(dataset, MetricFactory.create(distance, dataset.layout(), params)) classifier = lambda p: str(search(dataset, view, p)) return classifier
def testForceIdentity(self): l = PointLayout() l.add('a', RealType, FixedLength, 1) p = Point() p.setLayout(l) cd = MetricFactory.create('cosinesimilarity', p.layout(), { 'defaultValue': 0.5 }) self.assertEquals(cd(p, p), 0.5) ficd = MetricFactory.create('forceidentity', p.layout(), { 'distance': 'cosinesimilarity', 'params': { 'defaultValue': 0.5 } }) self.assertEquals(ficd(p, p), 0.0) p2 = Point(p) p2.setName('p2') self.assertEquals(ficd(p, p2), 0.5)
def testParserStillInValidStateAfterParserError(self): '''ticket #20: parser is in invalid state after parser error''' ds = testdata.createSimpleDataSet() dist = MetricFactory.create('null', ds.layout()) v = View(ds) result = v.nnSearch(ds.samplePoint(), dist, 'WHERE true').get(1) clause = 'WHERE label.tonal_key_mode.value = \\"major"' try: result = v.nnSearch(ds.samplePoint(), dist, clause).get(1) except: pass # filter correctly failed to compile result = v.nnSearch(ds.samplePoint(), dist, 'WHERE true').get(1)
def testParsedVsConstructedFilters(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') p = ds.samplePoint() p2 = ds.point('Higher State of Consciousness.mp3') queries = [ (p, '', ''), (p2, '', ''), (p2, 'WHERE value.tempotap_bpm.value > 140', Filter('tempotap_bpm.value', '>', 140)), (p, 'WHERE value.tempotap_bpm > 110', Filter('tempotap_bpm', '>', 110)), (p, 'WHERE value.tempotap_bpm > -10', Filter('tempotap_bpm', '>', -10)), (p, 'WHERE value.tempotap_bpm > 23000', Filter('tempotap_bpm', '>', 23000)), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130', AndFilter([ Filter('tempotap_bpm', '>', 120), Filter('tempotap_bpm', '<', 130) ])), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120', Filter('tempotap_bpm', 'between', [130, 120])), (p, 'WHERE label.key_key = "C"', Filter('key_key', '==', 'C')), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''', AndFilter([ OrFilter([ AndFilter([ Filter('key_key', '==', 'A'), Filter('key_mode', '==', 'major') ]), AndFilter([ Filter('key_key', '==', 'E'), Filter('key_mode', '==', 'minor') ]) ]), Filter('tempotap_bpm', '<', 90) ])) ] dist = MetricFactory.create('euclidean', ds.layout(), {'descriptorNames': '*.mean'}) v = View(ds) for (pt, filtstr, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filtstr), v.nnSearch(pt, dist, filt))
def testValidPoint(dataset, clause, fromList=None): # search the point using the clause: # if we have a result, the clause was true # if we have no result, the clause was false v = View(dataset) dist = MetricFactory.create('null', dataset.layout()) filtr = 'WHERE ' + clause if fromList: filtr = 'FROM ' + fromList + ' ' + filtr result = v.nnSearch(dataset.samplePoint(), dist, filtr).get(1) if len(result) == 1: return True return False
def testWeightedPearson(self): ds = testdata.createSimpleDataSet() ds.point('p')['a.1'] = [ 0, 0 ] # need to have 2 values before fixing length p1 = transform(ds, 'fixlength').point('p') p2 = Point(p1) dist = MetricFactory.create('WeightedPearson', p1.layout(), { 'weights': { '1': 0.3, 'c': 0.7 } }) p1['a.1'] = [ 0.12, 2.71 ] p1['c'] = 4.32 p2['1'] = [ 0.46, 1.12 ] p2['c'] = 2.4242 self.assertAlmostEqual(dist(p1, p2), 0.038222129799, 6)
def testDeleteUnderlyingDataSet(self): ds = testdata.loadTestDB() params = {'descriptorNames': ['*.mean', '*.var']} ds = transform(ds, 'fixlength', params) ds = transform(ds, 'cleaner', params) ds = transform(ds, 'normalize', params) dist = MetricFactory.create('euclidean', ds.layout(), params) v = View(ds) del ds #self.assertRaises(Exception, v.nnSearch, '01 Respect.mp3') # this doesn't throw anymore, as the View keeps a ref to the dataset v.nnSearch('01 Respect.mp3', dist)
def testRhythmDistance(self): p1 = testdata.createSimpleDataSet().point('p') p2 = Point(p1) dist = MetricFactory.create('Rhythm', p1.layout(), { 'descriptorName': 'a.1', 'indexRange': [ 1, 2, 4, 8 ], 'alpha': 0.8 }) p1['a.1'] = 3 p2['a.1'] = 2 self.assertAlmostEqual(dist(p1, p1), 0.0) self.assertAlmostEqual(dist(p1, p2), 0.4) self.assertAlmostEqual(dist(p2, p1), 0.4) p1['a.1'] = 3.14 self.assertAlmostEqual(dist(p1, p2), 0.344) p1['a.1'] = 6.23 self.assertAlmostEqual(dist(p1, p2), 0.45312)
def testSubspaceSearch(self): ds = testdata.loadTestDB() ds = transform(ds, 'fixlength') dist = MetricFactory.create('euclidean', ds.layout(), {'descriptorNames': '*.mean'}) v = View(ds) pid = 'Higher State of Consciousness.mp3' key_a_minor = v.nnSearch( pid, dist, 'WHERE label.key_key = "A" AND label.key_mode = "minor"') key_a = v.nnSearch(pid, dist, 'WHERE label.key_key = "A"') key_minor = v.nnSearch(pid, dist, 'WHERE label.key_mode = "minor"') key_a_minor_sspace1 = v.nnSearch(pid, key_minor, dist, 'WHERE label.key_key = "A"') key_a_minor_sspace2 = v.nnSearch(pid, key_a, dist, 'WHERE label.key_mode = "minor"') self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace1) self.assertEqualSearchSpace(key_a_minor, key_a_minor_sspace2)
def testCreatedInputSpace(self): ds = testdata.createSimpleDataSet() ds.point('p')['a.1'] = 23.0 for i in range(5): p = Point() p.setName('p%d' % i) p.setLayout(ds.originalLayout()) p['a.1'] = float(i) ds.addPoint(p) ds = transform(ds, 'fixlength') dist = MetricFactory.create('euclidean', ds.layout()) v = View(ds) p = ds.point('p') RS_remove = v.nnSearch(p, dist) RS_remove.removePoints(['p2', 'p4']) RS_create = InputSpace() RS_create.addPoints(ds, ['p', 'p0', 'p1', 'p3']) rsc = v.nnSearch(p, RS_remove, dist) rsa = v.nnSearch(p, RS_create, dist) self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)), v.nnSearch(p, rsc, dist).get(10)) self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.), ('p0', 23.)), v.nnSearch(p, rsa, dist).get(10)) # test thresholdLimit method self.assertEqual((('p', 0.), ), v.nnSearch(p, rsa, dist).thresholdLimit(10).get(10)) self.assertEqual((('p', 0.), ('p3', 20.)), v.nnSearch(p, rsa, dist).thresholdLimit(20).get(10)) self.assertEqual((('p', 0.), ('p3', 20.), ('p1', 22.)), v.nnSearch(p, rsa, dist).thresholdLimit(22.01).get(10))
def testComplete(self): # load 2.0 dataset, history, apply history to dataset # check nn-search results are the same as the ones we get when doing it from gaia 2.0 ds = DataSet() ds.load(testdata.GAIA_20_BACKWARDS_COMPAT_DATASET) h = TransfoChain() self.assertRaises(Exception, h.load, testdata.GAIA_20_BACKWARDS_COMPAT_HISTORY) return h.load(testdata.GAIA_20_BACKWARDS_COMPAT_HISTORY) ds = h.mapDataSet(ds) v = View(ds) dist = MetricFactory.create('euclidean', ds.layout()) results = v.nnSearch('01 Respect.mp3', dist).get(5) self.compareResults(results, testdata.GAIA_20_BACKWARDS_COMPAT_RESULTS) ds21 = DataSet() ds21.load(testdata.TEST_DATABASE) results = v.nnSearch(h.mapPoint(ds21.point('01 Respect.mp3')), dist).get(5) self.compareResults(results, testdata.GAIA_20_BACKWARDS_COMPAT_RESULTS)
def testSimple(self): ds = testdata.createSimpleDataSet() p2 = Point(ds.point('p')) p2.setName('p2') p3 = Point(ds.point('p')) p3.setName('p3') p3['a.1'] = 1 p4 = Point(ds.point('p')) p4.setName('p4') p4['a.1'] = 1 p4['a.2'] = 1 ds.addPoint(p2) ds.addPoint(p3) ds.addPoint(p4) ds = transform(ds, 'fixlength') dist = MetricFactory.create('euclidean', ds.layout()) v = View(ds) results = v.nnSearch('p', dist).get(10) self.assertEqual(results[0][1], 0.0) self.assertEqual(results[1][1], 0.0) self.assertSearchResultEqual(results[2], ('p3', 1.0)) self.assertSearchResultEqual(results[3], ('p4', math.sqrt(2)))
def search(ds, p): p = ds.history().mapPoint(p) dist = MetricFactory.create('euclidean', ds.layout()) return View(ds).nnSearch(p, dist).get(5)
def search(dataset, id, n): v = View(dataset) dist = MetricFactory.create('euclidean', dataset.layout()) return v.nnSearch(id, dist).get(n)
def testRegressionIndexing(self): ds = testdata.loadTestDB() ds = transform(ds, 'removevl') ds = transform(ds, 'fixlength') p = ds.samplePoint().name() p2 = 'Higher State of Consciousness.mp3' queries = [ (p, ''), (p2, ''), (p, 'WHERE (True AND True) and (true and TRUE)'), (p, 'WHERE (false AND True) OR (true and false)'), (p2, 'WHERE value.tempotap_bpm.value > 140'), (p, 'WHERE true AND value.tempotap_bpm.value > 140'), (p, 'WHERE value.tempotap_bpm > 110'), (p, 'WHERE value.tempotap_bpm > -10'), (p, 'WHERE value.tempotap_bpm < -10'), (p, 'WHERE value.tempotap_bpm > 23000'), (p, 'WHERE value.tempotap_bpm < 23000'), (p, 'WHERE value.tempotap_bpm > 120 AND value.tempotap_bpm < 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 130'), (p, 'WHERE value.tempotap_bpm BETWEEN 130 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN 120 AND 120'), (p, 'WHERE value.tempotap_bpm BETWEEN -2.3 AND 4096'), (p, "WHERE value.tempotap_bpm BETWEEN -2.3 AND -1.4"), (p, "WHERE value.tempotap_bpm BETWEEN 2048 AND 4096"), (p, 'WHERE label.key_key = "C"'), (p, 'WHERE label.key_key != "C"'), (p, 'WHERE label.key_key = "X"'), (p, 'WHERE label.key_key != "X"'), (p, 'WHERE label.key_key != "C" AND label.key_mode != "major"'), (p2, '''WHERE ((label.key_key = "A" AND label.key_mode = "major") OR (label.key_key = "E" AND label.key_mode = "minor")) AND value.tempotap_bpm < 90''') ] # test with standard views dist = MetricFactory.create('euclidean', ds.layout(), {'descriptorNames': '*.mean'}) v = View(ds) vidx = View(ds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt)) # test with frozen views dsr = transform( ds, 'select', {'descriptorNames': ds.layout().descriptorNames(RealType)}) dsnorm = transform(dsr, 'normalize') dspca = transform(dsnorm, 'pca', { 'resultName': 'pca', 'dimension': 25 }) fds = FrozenDataSet() fds.fromDataSet(dspca) fds.setReferenceDataSet(ds) dist = FrozenDistanceFactory.create('Euclidean', fds.layout(), {'descriptorName': 'pca'}) v = FrozenView(fds) vidx = FrozenView(fds) vidx.indexOn('tempotap_bpm') vidx.indexOn('key_key') vidx.indexOn('key_mode') for (pt, filt) in queries: self.assertEqualSearchSpace(v.nnSearch(pt, dist, filt), vidx.nnSearch(pt, dist, filt))