# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ from __future__ import with_statement import sys from os.path import join, abspath from optparse import OptionParser from gaia2 import filedir import gaia2.fastyaml as yaml PROJECT_TEMPLATE = open(join(filedir(), 'classification_project_template.yaml')).read() def generateProject(): parser = OptionParser(usage = '%prog [options] groundtruth_file filelist_file project_file datasets_dir results_dir') options, args = parser.parse_args() try: groundtruth_file = args[0] filelist_file = args[1] project_file = args[2] datasets_dir = args[3] results_dir = args[4] except:
# You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ from __future__ import print_function from __future__ import with_statement import os, sys from os.path import join, abspath, splitext, exists from optparse import OptionParser import gaia2.fastyaml as yaml import gaia2.mtgdb from gaia2.classification import GroundTruth from gaia2 import filedir PROJECT_TEMPLATE = open(join(filedir(), 'classification_project_template_2.1-beta5.yaml')).read() def validEqloud(f): return f.endswith('.sig') and not f.endswith('.neq.sig') def validNeqloud(f): return f.endswith('.neq.sig') def sigfileListFromCollection(collection, sigfiles_dir): # generate the filelist for the .sig files, as what we have in the metadata/ folder is # the filelist for the audio files # NB: we only take the first audio format available here (still true?) audioFiles = collection.relativePathFiles() # NB: we only take the files which are present in the groundtruth, as the others are
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ from __future__ import with_statement import os, sys from os.path import join, abspath, splitext, exists from optparse import OptionParser import gaia2.fastyaml as yaml import gaia2.mtgdb from gaia2.classification import GroundTruth from gaia2 import filedir PROJECT_TEMPLATE = open(join(filedir(), 'classification_project_template.yaml')).read() def validEqloud(f): return f.endswith('.sig') and not f.endswith('.neq.sig') def validNeqloud(f): return f.endswith('.neq.sig') def sigfileListFromCollection(collection, sigfiles_dir): # generate the filelist for the .sig files, as what we have in the metadata/ folder is # the filelist for the audio files # NB: we only take the first audio format available here (still true?)
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the Affero GNU General Public License # version 3 along with this program. If not, see http://www.gnu.org/licenses/ import gaia2 import gaia2.fastyaml as yaml import environment import collection from os.path import join COLLECTIONS_FILE = join(gaia2.filedir(), 'mtgdb_collections.yaml') ALL_MTGDB_COLLECTIONS = dict((c['name'], c) for c in yaml.load(open(COLLECTIONS_FILE).read())) class MtgdbCollection(collection.Collection): """An mtgdb.MtgdbCollection instance is a collection on the MTG-DB server that has been classified as stable and may be accessed directly by its name.""" def __init__(self, name, groundTruth = None): try: collection = ALL_MTGDB_COLLECTIONS[name] except KeyError: raise ValueError('Collection "%s" is not known by Gaia. Available collections are: %s' % (name, ALL_MTGDB_COLLECTIONS.keys())) super(MtgdbCollection, self).__init__(join(environment.MTGDB_AUDIO,collection['location']), groundTruth) self.name = name
def generate_project(groundtruth_file, filelist_file, project_file, datasets_dir, results_dir, seed=None, cluster_mode=False, template=None, force_consistency=False): gt = yaml.load(open(groundtruth_file, 'r')) try: className = gt['className'] groundTruth = gt['groundTruth'] except: print(groundtruth_file, "groundtruth file has incorrect format") sys.exit(2) fl = yaml.load(open(filelist_file, 'r')) gt_trackids = list(groundTruth.keys()) fl_trackids = list(fl.keys()) # check that there are no duplicated ids if len(gt_trackids) != len(set(gt_trackids)): print(groundtruth_file, "contains duplicated track ids") sys.exit(3) if len(fl_trackids) != len(set(fl_trackids)): print(filelist_file, "contains duplicated track ids") sys.exit(3) # check if filelist is consistent with groundtruth (no files missing) if set(gt_trackids) != set(fl_trackids): print("track ids found in", groundtruth_file, "are inconsistent with", filelist_file) sys.exit(4) if force_consistency: print( 'Checking Essentia version in the descriptor files to ensure consistency...' ) versions = get_essentia_versions(fl) if len(versions) > 1: raise Exception( "Couldn't find a unique Essentia version in the dataset. " "This exception is thrown because you are using the flag `force-consistency`" ) print('ok!') if not template: print('No classification project template specified.') essentia_version = DEFAULT_VERSION if not force_consistency: print( 'Analyzing the dataset to figure out which project template file to use...' ) versions = get_essentia_versions(fl) if len(versions) == 1: essentia_version = list(versions)[0] else: print("Couldn't find a unique essentia version in the dataset.") template_version = VERSION_MAP.get(essentia_version, DEFAULT_VERSION) print('Using classification project template "{}"'.format( template_version)) template = 'classification_project_template_{}.yaml'.format( template_version) project_template = open(join(filedir(), template)).read() # if not seed specified, get the current clock value if seed is None: import time seed = time.time() # write the project file with open(project_file, 'w') as pfile: pfile.write( project_template % { 'className': className, 'datasetsDirectory': abspath(datasets_dir), 'resultsDirectory': abspath(results_dir), 'filelist': abspath(filelist_file), 'groundtruth': abspath(groundtruth_file), 'seed': seed, 'clusterMode': cluster_mode }) print('Successfully written', project_file)