Example #1
0
    def __init__(self, dirpath, groundTruth=None):
        self.name = split(dirpath)[-1]
        self._basepath = dirpath
        self._config = yaml.loadfile(self.configFilePath())

        self._files = {}
        for type, props in list(self._config['audioFormats'].items()):
            self._files[type] = self.absolutePathFiles(type, props['filelist'])

        self.loadGroundTruth(groundTruth)
Example #2
0
    def relativePathFiles(self, audioFormat=None):
        if audioFormat is None:
            audioFormats = self._config["audioFormats"].keys()
            audioFormat = audioFormats[0]
            if len(audioFormats) > 1:
                print "WARNING: only taking audio format: %s out of: %s" % (audioFormat, audioFormats)

        filelist = self._config["audioFormats"][audioFormat]["filelist"]

        return yaml.loadfile(join(self._basepath, "metadata", filelist))
Example #3
0
    def __init__(self, dirpath, groundTruth=None):
        self.name = split(dirpath)[-1]
        self._basepath = dirpath
        self._config = yaml.loadfile(self.configFilePath())

        self._files = {}
        for type, props in self._config["audioFormats"].items():
            self._files[type] = self.absolutePathFiles(type, props["filelist"])

        self.loadGroundTruth(groundTruth)
Example #4
0
    def relativePathFiles(self, audioFormat=None):
        if audioFormat is None:
            audioFormats = list(self._config['audioFormats'].keys())
            audioFormat = audioFormats[0]
            if len(audioFormats) > 1:
                print('WARNING: only taking audio format: %s out of: %s' %
                      (audioFormat, audioFormats))

        filelist = self._config['audioFormats'][audioFormat]['filelist']

        return yaml.loadfile(join(self._basepath, 'metadata', filelist))
Example #5
0
def trainSVMfolds(collections, folds):
    gaia2.verbose = False

    for cname in collections.keys():
        collec = collections[cname]

        print('Training models for all folds for collection', cname)
        # load best parameters
        filename = glob.glob('test/evaldata/essentia_svm_models/%s*.param' %
                             CN(collec))[0]

        params = yaml.loadfile(filename)['model']
        if params.pop('classifier') != 'svm':
            raise Exception(
                'Can only use this script on SVM config parameters.')

        preproc = params.pop('preprocessing')

        # load original preprocessed dataset
        ds = DataSet()
        ds.load('%s/%s-harm-%s.db' % (WORK_DIR, CN(collec), preproc))

        # add 'highlevel.' in front of the descriptor, this is what will appear in the final Essentia sigfile
        gt = collec.groundTruth.copy()
        gt.className = 'highlevel.' + CN(collec)

        # create and train datasets for all folds
        print('Training for model:', gt.className)
        for i, fold in enumerate(folds[cname]):
            print(' - fold', i)
            model_filename = '%s/%s_%d.model' % (WORK_DIR, CN(collec), i)
            if os.path.exists(model_filename):
                print('already computed')
                continue

            # remove points from the fold to have a training dataset
            dsf = ds.copy()
            toRemove = set(fold) & set(ds.pointNames())
            dsf.removePoints(list(toRemove))

            # train SVM
            h = trainSVM(dsf, gt, **params)

            h.save(model_filename)
Example #6
0
def trainSVMfolds(collections, folds):
    gaia2.verbose = False

    for cname in collections.keys():
        collec = collections[cname]

        print 'Training models for all folds for collection', cname
        # load best parameters
        filename = glob.glob('test/evaldata/essentia_svm_models/%s*.param' % CN(collec))[0]

        params = yaml.loadfile(filename)['model']
        if params.pop('classifier') != 'svm':
            raise Exception('Can only use this script on SVM config parameters.')

        preproc = params.pop('preprocessing')

        # load original preprocessed dataset
        ds = DataSet()
        ds.load('%s/%s-harm-%s.db' % (WORK_DIR, CN(collec), preproc))

        # add 'highlevel.' in front of the descriptor, this is what will appear in the final Essentia sigfile
        gt = collec.groundTruth.copy()
        gt.className = 'highlevel.' + CN(collec)

        # create and train datasets for all folds
        print 'Training for model:', gt.className
        for i, fold in enumerate(folds[cname]):
            print ' - fold', i
            model_filename = '%s/%s_%d.model' % (WORK_DIR, CN(collec), i)
            if os.path.exists(model_filename):
                print 'already computed'
                continue

            # remove points from the fold to have a training dataset
            dsf = ds.copy()
            toRemove = set(fold) & set(ds.pointNames())
            dsf.removePoints(list(toRemove))

            # train SVM
            h = trainSVM(dsf, gt, **params)

            h.save(model_filename)
Example #7
0
 def absolutePathFiles(self, audioFormat, filelist):
     """Returns a map with the given list of audio files with their path expanded."""
     flist = yaml.loadfile(join(self._basepath, 'metadata', filelist))
     expanded = dict((id, join(self.audioDirectory(audioFormat), fpath))
                     for id, fpath in list(flist.items()))
     return expanded
Example #8
0
            ds_harm_proc.save(ds_harm_filename)


if __name__ == '__main__':
    c = loadCollections()

    try:
        os.mkdir(WORK_DIR)
    except OSError:
        pass

    # need to do some prep work before to harmonize all datasets layouts. This won't be
    # necessary anymore in the future when all is nicely generated with a single coherent
    # script, but at the moment we have to work with the data we have...
    harmonizeDatasets(c)

    cachedFolds = False
    foldsFile = '%s/folds.yaml' % WORK_DIR
    if os.path.exists(foldsFile):
        folds = yaml.loadfile(foldsFile)
    else:
        print('Generating folds for all collections...')
        folds = generateFolds(c, NFOLDS)
        yaml.dump(folds, open(foldsFile, 'w'))

    print('Training SVM models for their corresponding folds...')
    trainSVMfolds(c, folds)

    print('Generating the evaluation datasets from the models...')
    generateEvaluationDatasets(c, folds)
Example #9
0
 def absolutePathFiles(self, audioFormat, filelist):
     """Returns a map with the given list of audio files with their path expanded."""
     flist = yaml.loadfile(join(self._basepath, "metadata", filelist))
     expanded = dict((id, join(self.audioDirectory(audioFormat), fpath)) for id, fpath in flist.items())
     return expanded
Example #10
0


if __name__ == '__main__':
    c = loadCollections()

    try:
        os.mkdir(WORK_DIR)
    except OSError:
        pass

    # need to do some prep work before to harmonize all datasets layouts. This won't be
    # necessary anymore in the future when all is nicely generated with a single coherent
    # script, but at the moment we have to work with the data we have...
    harmonizeDatasets(c)

    cachedFolds = False
    foldsFile = '%s/folds.yaml' % WORK_DIR
    if os.path.exists(foldsFile):
        folds = yaml.loadfile(foldsFile)
    else:
        print 'Generating folds for all collections...'
        folds = generateFolds(c, NFOLDS)
        yaml.dump(folds, open(foldsFile, 'w'))

    print 'Training SVM models for their corresponding folds...'
    trainSVMfolds(c, folds)

    print 'Generating the evaluation datasets from the models...'
    generateEvaluationDatasets(c, folds)