def dataset_to_csv(filename, csv_filename): ds = gaia2.DataSet() ds.load(filename) out = open(csv_filename, 'w') valueNames = ds.layout().descriptorNames(gaia2.RealType) labelNames = ds.layout().descriptorNames(gaia2.StringType) out.write('Track name;') for name in labelNames: out.write('%s;' % name) for name in valueNames: out.write('%s;' % name) out.write('\n') for cname in ds.collectionNames(): for pname in ds.collection(cname).pointNames(): p = ds.collection(cname).point(pname) out.write('%s;' % pname) for name in labelNames: out.write('%s;' % p.label(name)) for name in valueNames: out.write('%s;' % str(p.value(name))) out.write('\n')
def __call__(self, *args, **kwargs): # pre-check for errors that might happen very often and where 1 good error message would # be real nice to have if (self.methodName.startswith('nnSearch') and self.methodName not in ('nnSearchById', 'nnSearchByIdWithFilter', 'nnSearchByExample', 'nnSearchByExampleWithFilter')): raise AttributeError( 'You need to use either nnSearchById{WithFilter} or nnSearchByExample{WithFilter}' ) # pre-processing for certain specific methods if self.methodName.startswith('nnSearchByExample'): args = (args[0].toBase64(), ) + args[1:] # in the case of an nnSearch request, we shouldn't do the query immediately but rather # return a proxy object that allows to chain queries using the search_space argument. # the actual query should only be resolved when the user calls the get() method on this # proxy object if self.methodName.startswith('nnSearch'): return ResultSet(self.endPoint, self.methodName, args, kwargs) # actual processing by the server result = YamlRPCMethod.__call__(self, *args, **kwargs) # post-processing for certain specific methods if self.methodName == 'layout': result = yaml.load(result) elif self.methodName == 'getPoint': try: import gaia2 except ImportError: raise ImportError( 'You need to have the gaia2 python module installed in order to be able to retrieve single points' ) p = gaia2.Point() p.fromBase64(result) result = p elif self.methodName == 'getPoints': try: import gaia2 except ImportError: raise ImportError( 'You need to have the gaia2 python module installed in order to be able to retrieve points' ) ds = gaia2.DataSet() ds.fromBase64(result) result = ds return result
def select(self, dbfile, pca_covered_variance=75, highlevel=True): ''' dbfile: the path to the gaia dataset pca_covered_variance: the pca transofrmation should keep at least this variance highlevel:include high level descriptors ''' if not os.path.exists("transformed_dbs"): os.mkdir("transformed_dbs") prefix = dbfile[dbfile.rfind("/") + 1:dbfile.rfind(".")] print dbfile ds = gaia2.DataSet() ds.load(dbfile) cleaner = gaia2.AnalyzerFactory.create('cleaner') cleanDB = cleaner.analyze(ds).applyToDataSet(ds) if highlevel: to_remove = ['*.dmean2', '*.dvar2', '*.min', '*.max', '*cov'] else: to_remove = [ '.highlevel.*', '*.dmean2', '*.dvar2', '*.min', '*.max', '*cov' ] fselectDB = gaia2.transform(cleanDB, 'remove', {'descriptorNames': to_remove}) # NORMALIZE, PCA & Friends normalize = gaia2.AnalyzerFactory.create('normalize') normalizedDB = normalize.analyze(fselectDB).applyToDataSet(fselectDB) pcavar = gaia2.AnalyzerFactory.create( 'pca', { 'coveredVariance': pca_covered_variance, 'resultName': 'pca%dtransform' % pca_covered_variance }) pcaDB = pcavar.analyze(normalizedDB).applyToDataSet(normalizedDB) mfccDB = gaia2.transform( cleanDB, 'select', { 'descriptorNames': [ '*mfcc*', '.highlevel.*', '.rhythm.bpm', '.rhythm.onset_rate' ] }) finalDB = gaia2.mergeDataSets(mfccDB, pcaDB) outfile = "transformed_dbs/" + prefix + ".db" finalDB.save(outfile)
if x_label_ticks: n_bins = len(x_label_ticks) n, bins, patches = ax.hist(pool, bins=n_bins, range=(float(range_min), float(range_max)), log=False, histtype='stepfilled') pl.title('Distribution: %s' % label) if not x_label_ticks: ax.ticklabel_format(axis='x', style='sci', scilimits=(-3,3)) else: pl.xticks(range(0, len(x_label_ticks)),[' %s'%tick for tick in x_label_ticks]) ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2)) ax.set_xlabel('Value') ax.set_ylabel('Frequency of occurrence') ax.grid(True) pl.savefig('%s/%s.png' % (OUT_FOLDER, label[1:])) pl.close() ds = gaia2.DataSet() dataset_path = GAIA_INDEX_FILE ds.load(dataset_path) transformation_history = ds.history().toPython() normalization_coeffs = None for i in range(0,len(transformation_history)): if transformation_history[-(i+1)]['Analyzer name'] == 'normalize': normalization_coeffs = transformation_history[-(i+1)]['Applier parameters']['coeffs'] descriptor_names = ds.layout().descriptorNames() point_names = ds.pointNames() example_point = ds.point(point_names[0]) stats = ['dmean', 'dmean2', 'dvar', 'dvar2', 'max', 'mean', 'min', 'var'] for descriptor_name in descriptor_names:
def __load_dataset(self, dataset_file): print "Loading training dataset...." self.ds = gaia2.DataSet() self.ds.load(dataset_file)