Beispiel #1
0
def walk_dir_hashes(dataset, dset):
    dpath = '%s%s%s' % (settings.RESULT_DIR, os.sep, dset)
    for method in settings.METHODS:
        if method not in os.listdir(dpath):
            continue
        mpath = '%s%s%s' % (dpath, os.sep, method)
        files = ['%s%s%s' % (mpath, os.sep, f) for f in os.listdir(mpath)]
        for f in files:
            hsh = hash_from_filename(f)
            yield hsh
Beispiel #2
0
def walk_directory(dataset, dset, ac):
    dpath = '%s%s%s' % (settings.RESULT_DIR, os.sep, dset)
    for method in settings.METHODS:
        if not method in os.listdir(dpath):
            continue
        mpath = '%s%s%s' % (dpath, os.sep, method)
        files = ['%s%s%s' % (mpath, os.sep, f) for f in os.listdir(mpath)]
        for fpath in files:
            hsh = hash_from_filename(fpath)
            if not ac.has_result(hsh):
                fid = open(fpath, 'r')
                yield dataset, method, fid, hsh
Beispiel #3
0
def walk_archive_hashes(dataset, dset, fname):
    fpath = os.path.join(settings.RESULT_DIR, fname)
    if fname.endswith('bz2'):
        tar = tarfile.open(fpath, 'r:bz2')
    elif fname.endswith('gz'):
        tar = tarfile.open(fpath, 'r:gz')
    else:
        l = lzma.open(fpath, 'r')
        tar = tarfile.open(fileobj=l)
    for tarinfo in tar:
        if not tarinfo.isreg():
            continue
        hsh = hash_from_filename(tarinfo.name)
        yield hsh
Beispiel #4
0
def walk_tar(tar, ac):
    for tarinfo in tar:
        if not tarinfo.isreg():
            continue
        hsh = hash_from_filename(tarinfo.name)
        if not ac.has_result(hsh):
            fid = tar.extractfile(tarinfo)
            tar_dset = tarinfo.name.split('/')[0]
            dataset = next(
                (x for x in settings.DATASETS if dataset_name(x) == tar_dset),
                None)
            tar_mth = tarinfo.name.split('/')[1]
            method = next((x for x in settings.METHODS if x == tar_mth), None)
            yield dataset, method, fid, hsh
Beispiel #5
0
def dataset_completed(dsetfiles, dset, task_dict):
    if settings.TYPE == 'ASSESS':
        dset_tasks = {
            k: v
            for k, v in task_dict.iteritems() if v['dataset'] == dset
        }
    elif settings.TYPE == 'CV_TT':
        dset_tasks = {
            k: v
            for k, v in task_dict.iteritems()
            if (v['train_dataset'] == dset[0] and v['test_dataset'] == dset[1])
        }
    else:
        error("Compressing data not supported for TYPE = %s" % settings.TYPE)
        raise SystemExit
    have_hashes = set([hash_from_filename(f) for f in dsetfiles])
    need_hashes = set(dset_tasks.keys())
    return have_hashes == need_hashes