def createDataSet(): l = PointLayout() l.add('a', RealType) ds = DataSet() # p1.a = (0.0, 0.0) p = Point() p.setName('p1') p.setLayout(l) p['a'] = (0.0, 0.0) ds.addPoint(p) # p2.a = (0.5, 1.0) p = Point() p.setName('p2') p.setLayout(l) p['a'] = (0.5, 1.0) ds.addPoint(p) if testdata.useFixedLength: ds = testdata.fixLength(ds) if testdata.useEnumerate: ds = testdata.enumerateStrings(ds) return ds
def newPoint(name): l = PointLayout() l.add('a', RealType) p = Point() p.setName(name) p.setLayout(l) return p
def createSimpleLayout(): l = PointLayout() l.add('a.1', RealType) l.add('a.2', RealType) l.add('c', RealType) l.add('b', RealType) l.add('d', StringType) l.add('e', RealType, FixedLength, 3) return l
def testChronoIndependence(self): #'''Layouts built using the same descriptors but in a different order should still be equal''' l = PointLayout() l.add('a', RealType) l.add('b', RealType) l2 = PointLayout() l2.add('b', RealType) l2.add('a', RealType) self.assert_(l == l2)
def testForceIdentity(self): l = PointLayout() l.add('a', RealType, FixedLength, 1) p = Point() p.setLayout(l) cd = MetricFactory.create('cosinesimilarity', p.layout(), { 'defaultValue': 0.5 }) self.assertEquals(cd(p, p), 0.5) ficd = MetricFactory.create('forceidentity', p.layout(), { 'distance': 'cosinesimilarity', 'params': { 'defaultValue': 0.5 } }) self.assertEquals(ficd(p, p), 0.0) p2 = Point(p) p2.setName('p2') self.assertEquals(ficd(p, p2), 0.5)
def testComplexReferenceCounting(self): ds = DataSet() self.assertEqual(ds.layout().ref(), 2) # 1 + 1 from temp object p = Point() p.setName('p1') lext = PointLayout(p.layout()) # +1, {lext,p}.ref = 2 self.assertEqual(lext.ref(), 2) lext = p.layout().copy() # copy, lext.ref = 1; p.ref -= 1, = 1 self.assertEqual(lext.ref(), 1) ds.addPoint(p) # +3 (dataset + pointcopy), ref = 3 self.assertEqual(lext.ref(), 1) self.assertEqual(ds.layout().ref(), 4) # 3 + 1 temp object p2 = Point(p) # +1, {p,p2}.ref = 5 p2.setName('p2') self.assertEqual(ds.layout().ref(), 5) ds.addPoint(p2) self.assertEqual(ds.layout().ref(), 6) # +1 pointcopy, ref = 6
def PCA(x): points = [] layout = PointLayout() layout.add('x', RealType) for i, l in enumerate(x): p = Point() p.setName('p%d' % i) p.setLayout(layout) p['x'] = l points.append(p) ds = DataSet() ds.addPoints(points) ds = transform(ds, 'fixlength') ds = transform(ds, 'pca', { 'dimension': len(x[0]), 'resultName': 'pca' }) result = [] for p in ds.points(): result.append(p['pca']) return result
def PCA(x): points = [] layout = PointLayout() layout.add('x', RealType) for i, l in enumerate(x): p = Point() p.setName('p%d' % i) p.setLayout(layout) p['x'] = l points.append(p) ds = DataSet() ds.addPoints(points) ds = transform(ds, 'fixlength') ds = transform(ds, 'pca', {'dimension': len(x[0]), 'resultName': 'pca'}) result = [] for p in ds.points(): result.append(p['pca']) return result
def testMerge(self): l2 = testdata.createSimpleLayout() self.assertEqual(self.l1, l2) self.assertRaises(Exception, mergeLayouts, self.l1, l2) l3 = PointLayout() l3.add('a', RealType) l3.add('a', '3', RealType) l4 = mergeLayouts(l2, l3) self.assertEqual(len(l4.descriptorNames()), len(l2.descriptorNames())+1) self.assertEqual(l4.descriptorLocation('a').size(RealType, VariableLength), 3)
def readLibSVMDataSet(filename): data = [l.split() for l in open(filename).readlines()] minidx = maxidx = 1 for l in data: for i in range(1, len(l)): dim, value = l[i].split(':') l[i] = (int(dim), float(value)) minidx = min(minidx, int(dim)) maxidx = max(maxidx, int(dim)) dimension = maxidx - minidx + 1 layout = PointLayout() layout.add('class', StringType) layout.add('value', RealType) ds = DataSet() n = 0 points = [] for l in data: p = Point() p.setLayout(layout) p.setName('instance_%06d' % n) n += 1 p['class'] = l[0] desc = RealDescriptor(dimension, 0.0) for dim, value in l[1:]: desc[dim - minidx] = value p['value'] = desc points.append(p) ds.addPoints(points) return ds
def testBasicReferenceCounting(self): l1 = PointLayout() self.assertEqual(l1.ref(), 1) # add python ref l2 = l1 self.assertEqual(l1.ref(), 1) self.assertEqual(l2.ref(), 1) # add C++ ref l3 = PointLayout(l2) self.assertEqual(l1.ref(), 2) self.assertEqual(l2.ref(), 2) self.assertEqual(l3.ref(), 2) # make different copy l4 = l3.copy() self.assertEqual(l4.ref(), 1) # remove C++ ref del l3 self.assertEqual(l1.ref(), 1) self.assertEqual(l2.ref(), 1) # add C++ ref l3 = PointLayout(l2) # remove python ref del l1 self.assertEqual(l2.ref(), 2) self.assertEqual(l3.ref(), 2) # remove last python ref, hence remove C++ ref too del l2 self.assertEqual(l3.ref(), 1)
def testCorrectNodes(self): l = PointLayout() self.assertRaises(Exception, l.add, 'undef') l.add('blah', RealType) l.add('', 'blah', StringType) self.assertEqual(len(l.descriptorNames()), 1) self.assertEqual(l.descriptorLocation('blah').type(), RealType) l.add('blouh', StringType) self.assertEqual(len(l.descriptorNames()), 2) self.assertRaises(Exception, l.add, 'blouh', 'bluh', UndefinedType) self.assertEqual(len(l.descriptorNames()), 2)
def testLibyaml1024CharLimit(self): l = PointLayout() l.add('a'*2000, RealType) from gaia2 import fastyaml as yaml yaml.load(l.toYaml())
def testIntersect(self): l1 = PointLayout() l1.add('a', RealType) l1.add('b', RealType) l2 = PointLayout() l2.add('a', RealType) l2.add('b', StringType) l2.add('d', RealType) lr = l1 & l2 self.assertEqual(lr.descriptorNames(), ('.a',)) self.assertEqual(l1.descriptorNames(), (l1 & l1).descriptorNames()) self.assertEqual(l2.descriptorNames(), (l2 & l2).descriptorNames())