Beispiel #1
0
def dataset_to_csv(filename, csv_filename):
    ds = gaia2.DataSet()
    ds.load(filename)

    out = open(csv_filename, 'w')

    valueNames = ds.layout().descriptorNames(gaia2.RealType)
    labelNames = ds.layout().descriptorNames(gaia2.StringType)

    out.write('Track name;')
    for name in labelNames:
        out.write('%s;' % name)
    for name in valueNames:
        out.write('%s;' % name)
    out.write('\n')

    for cname in ds.collectionNames():
        for pname in ds.collection(cname).pointNames():
            p = ds.collection(cname).point(pname)

            out.write('%s;' % pname)

            for name in labelNames:
                out.write('%s;' % p.label(name))

            for name in valueNames:
                out.write('%s;' % str(p.value(name)))

            out.write('\n')
Beispiel #2
0
    def __call__(self, *args, **kwargs):
        # pre-check for errors that might happen very often and where 1 good error message would
        # be real nice to have
        if (self.methodName.startswith('nnSearch') and self.methodName
                not in ('nnSearchById', 'nnSearchByIdWithFilter',
                        'nnSearchByExample', 'nnSearchByExampleWithFilter')):
            raise AttributeError(
                'You need to use either nnSearchById{WithFilter} or nnSearchByExample{WithFilter}'
            )

        # pre-processing for certain specific methods
        if self.methodName.startswith('nnSearchByExample'):
            args = (args[0].toBase64(), ) + args[1:]

        # in the case of an nnSearch request, we shouldn't do the query immediately but rather
        # return a proxy object that allows to chain queries using the search_space argument.
        # the actual query should only be resolved when the user calls the get() method on this
        # proxy object
        if self.methodName.startswith('nnSearch'):
            return ResultSet(self.endPoint, self.methodName, args, kwargs)

        # actual processing by the server
        result = YamlRPCMethod.__call__(self, *args, **kwargs)

        # post-processing for certain specific methods
        if self.methodName == 'layout':
            result = yaml.load(result)

        elif self.methodName == 'getPoint':
            try:
                import gaia2
            except ImportError:
                raise ImportError(
                    'You need to have the gaia2 python module installed in order to be able to retrieve single points'
                )
            p = gaia2.Point()
            p.fromBase64(result)
            result = p

        elif self.methodName == 'getPoints':
            try:
                import gaia2
            except ImportError:
                raise ImportError(
                    'You need to have the gaia2 python module installed in order to be able to retrieve points'
                )
            ds = gaia2.DataSet()
            ds.fromBase64(result)
            result = ds

        return result
Beispiel #3
0
    def select(self, dbfile, pca_covered_variance=75, highlevel=True):
        '''
        dbfile: the path to the gaia dataset
        pca_covered_variance: the pca transofrmation should keep at least this variance
        highlevel:include high level descriptors
        '''
        if not os.path.exists("transformed_dbs"):
            os.mkdir("transformed_dbs")
        prefix = dbfile[dbfile.rfind("/") + 1:dbfile.rfind(".")]
        print dbfile
        ds = gaia2.DataSet()
        ds.load(dbfile)
        cleaner = gaia2.AnalyzerFactory.create('cleaner')
        cleanDB = cleaner.analyze(ds).applyToDataSet(ds)

        if highlevel:
            to_remove = ['*.dmean2', '*.dvar2', '*.min', '*.max', '*cov']
        else:
            to_remove = [
                '.highlevel.*', '*.dmean2', '*.dvar2', '*.min', '*.max', '*cov'
            ]

        fselectDB = gaia2.transform(cleanDB, 'remove',
                                    {'descriptorNames': to_remove})

        # NORMALIZE, PCA & Friends
        normalize = gaia2.AnalyzerFactory.create('normalize')
        normalizedDB = normalize.analyze(fselectDB).applyToDataSet(fselectDB)

        pcavar = gaia2.AnalyzerFactory.create(
            'pca', {
                'coveredVariance': pca_covered_variance,
                'resultName': 'pca%dtransform' % pca_covered_variance
            })
        pcaDB = pcavar.analyze(normalizedDB).applyToDataSet(normalizedDB)

        mfccDB = gaia2.transform(
            cleanDB, 'select', {
                'descriptorNames': [
                    '*mfcc*', '.highlevel.*', '.rhythm.bpm',
                    '.rhythm.onset_rate'
                ]
            })

        finalDB = gaia2.mergeDataSets(mfccDB, pcaDB)
        outfile = "transformed_dbs/" + prefix + ".db"
        finalDB.save(outfile)
    if x_label_ticks:
        n_bins = len(x_label_ticks)
    n, bins, patches = ax.hist(pool, bins=n_bins, range=(float(range_min), float(range_max)), log=False, histtype='stepfilled')
    pl.title('Distribution: %s' % label)
    if not x_label_ticks:
        ax.ticklabel_format(axis='x', style='sci', scilimits=(-3,3))
    else:
        pl.xticks(range(0, len(x_label_ticks)),['           %s'%tick for tick in x_label_ticks])
    ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
    ax.set_xlabel('Value')
    ax.set_ylabel('Frequency of occurrence')
    ax.grid(True)
    pl.savefig('%s/%s.png' % (OUT_FOLDER, label[1:]))
    pl.close()

ds = gaia2.DataSet()
dataset_path = GAIA_INDEX_FILE
ds.load(dataset_path)
transformation_history = ds.history().toPython()
normalization_coeffs = None
for i in range(0,len(transformation_history)):
    if transformation_history[-(i+1)]['Analyzer name'] == 'normalize':
        normalization_coeffs = transformation_history[-(i+1)]['Applier parameters']['coeffs']
descriptor_names = ds.layout().descriptorNames()
point_names = ds.pointNames()
example_point = ds.point(point_names[0])
stats = ['dmean', 'dmean2', 'dvar', 'dvar2', 'max', 'mean', 'min', 'var']


for descriptor_name in descriptor_names:
 def __load_dataset(self, dataset_file):
     print "Loading training dataset...."
     self.ds = gaia2.DataSet()
     self.ds.load(dataset_file)