def move_results(task_dict): mkdir(settings.RESULT_DIR) subdirs = os.listdir(settings.STAGE_DIR) for subdir in subdirs: subpath = '%s%s%s' % (settings.STAGE_DIR, os.sep, subdir) files = os.listdir(subpath) for fname in files: fpath = '%s%s%s' % (subpath, os.sep, fname) try: hsh = int(splitext(basename(fpath))[0]) except ValueError: warning("Couldn't obtain hash from file: %s. Skipping." % basename(fpath)) continue if settings.TYPE == 'RAW': dset = 'dataset' method = 'method' else: if settings.TYPE == 'ASSESS': dset = dataset_name(task_dict[hsh]['dataset']) elif settings.TYPE == 'CV_TT': dset = dataset_name((task_dict[hsh]['train_dataset'], task_dict[hsh]['test_dataset'])) method = task_dict[hsh]['method'] outdir = '%s%s%s%s%s' % (settings.RESULT_DIR, os.sep, dset, os.sep, method) mkdir(outdir) dpath = '%s%s%s' % (outdir, os.sep, fname) shutil.move(fpath, dpath) clean_empty_dir(subpath)
def compress_dataset(dset): dsetname = dataset_name(dset) dsetpath = os.path.join(settings.RESULT_DIR, dsetname) dsetpath = dsetpath.rstrip(os.sep) if settings.COMPRESSION == 'bzip2': extension = 'bz2' elif settings.COMPRESSION == 'gzip': extension = 'gz' elif settings.COMPRESSION == 'lzma': extension = 'xz' else: error("Unknown compression algorithm specified in " "COMPRESSION configuration. Please check the " "configuration file.") raise SystemExit output_filename = '%s.tar.%s' % (dsetpath, extension) # lzma will be available in tarfile when abed is ported to Python 3. On # posix systems we can try compressing with the tar command. if os.name == 'posix' and settings.COMPRESSION == 'lzma': try: cmd = ('XZ_OPT=-9 tar --directory=%s -Jcf %s %s' % (settings.RESULT_DIR, output_filename, dsetname)) check_output(cmd, stderr=STDOUT, shell=True) except CalledProcessError: error("There was an error executing '%s'.") raise SystemExit elif settings.COMPRESSION == 'lzma': error("lzma compression is not yet available for your platform.") raise SystemExit else: mode = 'w:%s' % extension with tarfile.open(output_filename, mode, compresslevel=9) as tar: tar.add(dsetpath, arcname=os.path.basename(dsetpath))
def __init__(self, hsh=None, dataset=None, method=None): self.scalars = set() self.metrics = set() self.metric_targets = set() self.results = {} self.hsh = hsh self.dataset = dataset_name(dataset) self.method = method
def files_w_dataset(dataset): dset = dataset_name(dataset) if dset not in os.listdir(settings.RESULT_DIR): raise AbedDatasetdirNotFoundException dpath = '%s%s%s' % (settings.RESULT_DIR, os.sep, dset) for method in os.listdir(dpath): mpath = '%s%s%s' % (dpath, os.sep, method) for f in os.listdir(mpath): fname = '%s%s%s' % (mpath, os.sep, f) yield fname
def walk_hashes(): results = os.listdir(settings.RESULT_DIR) for dataset in iter_progress(settings.DATASETS): dset = dataset_name(dataset) if dset in results: for hsh in walk_dir_hashes(dataset, dset): yield hsh tarstr = '%s.tar' % dset if any([x.startswith(tarstr) for x in results]): fname = next((x for x in results if x.startswith(tarstr)), None) for hsh in walk_archive_hashes(dataset, dset, fname): yield hsh
def files_w_dset_and_method(dataset, method): dset = dataset_name(dataset) if dset not in os.listdir(settings.RESULT_DIR): raise AbedDatasetdirNotFoundException(dset) dpath = '%s%s%s' % (settings.RESULT_DIR, os.sep, dset) methdirs = os.listdir(dpath) if not method in methdirs: raise AbedMethoddirNotFoundException(method) mpath = '%s%s%s' % (dpath, os.sep, method) for f in os.listdir(mpath): fname = '%s%s%s' % (mpath, os.sep, f) yield fname
def walk_for_cache(ac): results = os.listdir(settings.RESULT_DIR) for dataset in iter_progress(settings.DATASETS): dset = dataset_name(dataset) if dset in results: for d, m, f, h in walk_directory(dataset, dset, ac): yield d, m, f, h tarstr = '%s.tar' % dset if any([x.startswith(tarstr) for x in results]): fname = next((x for x in results if x.startswith(tarstr)), None) for d, m, f, h in walk_archive(dataset, dset, fname, ac): yield d, m, f, h
def walk_tar(tar, ac): for tarinfo in tar: if not tarinfo.isreg(): continue hsh = hash_from_filename(tarinfo.name) if not ac.has_result(hsh): fid = tar.extractfile(tarinfo) tar_dset = tarinfo.name.split('/')[0] dataset = next( (x for x in settings.DATASETS if dataset_name(x) == tar_dset), None) tar_mth = tarinfo.name.split('/')[1] method = next((x for x in settings.METHODS if x == tar_mth), None) yield dataset, method, fid, hsh