# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/



from __future__ import with_statement
import sys
from os.path import join, abspath
from optparse import OptionParser
from gaia2 import filedir
import gaia2.fastyaml as yaml

PROJECT_TEMPLATE = open(join(filedir(), 'classification_project_template.yaml')).read()



def generateProject():
    parser = OptionParser(usage = '%prog [options] groundtruth_file filelist_file project_file datasets_dir results_dir')

    options, args = parser.parse_args()

    try:
        groundtruth_file = args[0]
        filelist_file = args[1]
        project_file = args[2]
        datasets_dir = args[3]
        results_dir = args[4]
    except:
Exemple #2
0
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/


from __future__ import print_function
from __future__ import with_statement
import os, sys
from os.path import join, abspath, splitext, exists
from optparse import OptionParser
import gaia2.fastyaml as yaml
import gaia2.mtgdb
from gaia2.classification import GroundTruth
from gaia2 import filedir


PROJECT_TEMPLATE = open(join(filedir(), 'classification_project_template_2.1-beta5.yaml')).read()

def validEqloud(f):
    return f.endswith('.sig') and not f.endswith('.neq.sig')

def validNeqloud(f):
    return f.endswith('.neq.sig')


def sigfileListFromCollection(collection, sigfiles_dir):
    # generate the filelist for the .sig files, as what we have in the metadata/ folder is
    # the filelist for the audio files
    # NB: we only take the first audio format available here (still true?)
    audioFiles = collection.relativePathFiles()

    # NB: we only take the files which are present in the groundtruth, as the others are
Exemple #3
0
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

from __future__ import with_statement
import os, sys
from os.path import join, abspath, splitext, exists
from optparse import OptionParser
import gaia2.fastyaml as yaml
import gaia2.mtgdb
from gaia2.classification import GroundTruth
from gaia2 import filedir

PROJECT_TEMPLATE = open(join(filedir(),
                             'classification_project_template.yaml')).read()


def validEqloud(f):
    return f.endswith('.sig') and not f.endswith('.neq.sig')


def validNeqloud(f):
    return f.endswith('.neq.sig')


def sigfileListFromCollection(collection, sigfiles_dir):
    # generate the filelist for the .sig files, as what we have in the metadata/ folder is
    # the filelist for the audio files
    # NB: we only take the first audio format available here (still true?)
Exemple #4
0
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/



import gaia2
import gaia2.fastyaml as yaml
import environment
import collection
from os.path import join

COLLECTIONS_FILE = join(gaia2.filedir(), 'mtgdb_collections.yaml')
ALL_MTGDB_COLLECTIONS = dict((c['name'], c) for c in yaml.load(open(COLLECTIONS_FILE).read()))


class MtgdbCollection(collection.Collection):
    """An mtgdb.MtgdbCollection instance is a collection on the MTG-DB server that
    has been classified as stable and may be accessed directly by its name."""

    def __init__(self, name, groundTruth = None):
        try:
            collection = ALL_MTGDB_COLLECTIONS[name]
        except KeyError:
            raise ValueError('Collection "%s" is not known by Gaia. Available collections are: %s' % (name, ALL_MTGDB_COLLECTIONS.keys()))

        super(MtgdbCollection, self).__init__(join(environment.MTGDB_AUDIO,collection['location']), groundTruth)
        self.name = name
def generate_project(groundtruth_file,
                     filelist_file,
                     project_file,
                     datasets_dir,
                     results_dir,
                     seed=None,
                     cluster_mode=False,
                     template=None,
                     force_consistency=False):

    gt = yaml.load(open(groundtruth_file, 'r'))
    try:
        className = gt['className']
        groundTruth = gt['groundTruth']
    except:
        print(groundtruth_file, "groundtruth file has incorrect format")
        sys.exit(2)

    fl = yaml.load(open(filelist_file, 'r'))

    gt_trackids = list(groundTruth.keys())
    fl_trackids = list(fl.keys())

    # check that there are no duplicated ids
    if len(gt_trackids) != len(set(gt_trackids)):
        print(groundtruth_file, "contains duplicated track ids")
        sys.exit(3)

    if len(fl_trackids) != len(set(fl_trackids)):
        print(filelist_file, "contains duplicated track ids")
        sys.exit(3)

    # check if filelist is consistent with groundtruth (no files missing)
    if set(gt_trackids) != set(fl_trackids):
        print("track ids found in", groundtruth_file, "are inconsistent with",
              filelist_file)
        sys.exit(4)

    if force_consistency:
        print(
            'Checking Essentia version in the descriptor files to ensure consistency...'
        )
        versions = get_essentia_versions(fl)

        if len(versions) > 1:
            raise Exception(
                "Couldn't find a unique Essentia version in the dataset. "
                "This exception is thrown because you are using the flag `force-consistency`"
            )
        print('ok!')

    if not template:
        print('No classification project template specified.')
        essentia_version = DEFAULT_VERSION

        if not force_consistency:
            print(
                'Analyzing the dataset to figure out which project template file to use...'
            )
            versions = get_essentia_versions(fl)

        if len(versions) == 1:
            essentia_version = list(versions)[0]
        else:
            print("Couldn't find a unique essentia version in the dataset.")

        template_version = VERSION_MAP.get(essentia_version, DEFAULT_VERSION)

        print('Using classification project template "{}"'.format(
            template_version))
        template = 'classification_project_template_{}.yaml'.format(
            template_version)

    project_template = open(join(filedir(), template)).read()

    # if not seed specified, get the current clock value
    if seed is None:
        import time
        seed = time.time()

    # write the project file
    with open(project_file, 'w') as pfile:
        pfile.write(
            project_template % {
                'className': className,
                'datasetsDirectory': abspath(datasets_dir),
                'resultsDirectory': abspath(results_dir),
                'filelist': abspath(filelist_file),
                'groundtruth': abspath(groundtruth_file),
                'seed': seed,
                'clusterMode': cluster_mode
            })

    print('Successfully written', project_file)