Ejemplo n.º 1
0
def main():
    mkdir(DATADIR)
    for i in range(N_DATASETS):
        bias = 10.0 * random()
        X, y, coef = make_regression(n_samples=900,
                                     n_features=20,
                                     n_informative=10,
                                     bias=bias,
                                     noise=2.0,
                                     coef=True,
                                     random_state=round(random() * 1e6))
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=1.0 /
                                                            3.0,
                                                            random_state=42)

        train = Bunch(X=X_train, y=y_train, true_coef=coef)
        test = Bunch(X=X_test, y=y_test)

        train_filename = os.path.join(DATADIR,
                                      'dataset_%i_train.txt' % (i + 1))
        test_filename = os.path.join(DATADIR, 'dataset_%i_test.txt' % (i + 1))
        with open(train_filename, 'wb') as fid:
            cPickle.dump(train, fid, 2)
        with open(test_filename, 'wb') as fid:
            cPickle.dump(test, fid, 2)
Ejemplo n.º 2
0
def move_results(task_dict):
    mkdir(settings.RESULT_DIR)
    subdirs = os.listdir(settings.STAGE_DIR)
    for subdir in subdirs:
        subpath = '%s%s%s' % (settings.STAGE_DIR, os.sep, subdir)
        files = os.listdir(subpath)
        for fname in files:
            fpath = '%s%s%s' % (subpath, os.sep, fname)
            try:
                hsh = int(splitext(basename(fpath))[0])
            except ValueError:
                warning("Couldn't obtain hash from file: %s. Skipping." %
                        basename(fpath))
                continue
            if settings.TYPE == 'RAW':
                dset = 'dataset'
                method = 'method'
            else:
                if settings.TYPE == 'ASSESS':
                    dset = dataset_name(task_dict[hsh]['dataset'])
                elif settings.TYPE == 'CV_TT':
                    dset = dataset_name((task_dict[hsh]['train_dataset'],
                        task_dict[hsh]['test_dataset']))
                method = task_dict[hsh]['method']
            outdir = '%s%s%s%s%s' % (settings.RESULT_DIR, os.sep, dset,
                    os.sep, method)
            mkdir(outdir)
            dpath = '%s%s%s' % (outdir, os.sep, fname)
            shutil.move(fpath, dpath)
        clean_empty_dir(subpath)
Ejemplo n.º 3
0
def write_output(output, hsh):
    scratchdir = get_scratchdir()
    scratch_results = '%s/results' % scratchdir
    mkdir(scratch_results)
    outdir = get_output_dir(scratch_results)
    fname = '%s/%s.txt' % (outdir, hsh)
    with open(fname, 'w') as fid:
        fid.write(output)
Ejemplo n.º 4
0
def copy_data_file(filepath):
    src = get_data_path(filepath)
    datapath = os.path.join('assets', filepath)
    dest = os.path.join(settings.OUTPUT_DIR, 'html', datapath)
    destdir = os.path.dirname(dest)
    mkdir(destdir)
    shutil.copy(src, dest)
    return datapath
Ejemplo n.º 5
0
 def __init__(self,
              methods=None,
              datasets=None,
              metrics=None,
              scalars=None):
     self.methods = set()
     self.datasets = set()
     self.metrics = set()
     self.metric_targets = set()
     self.scalars = set()
     self.cache = {}
     self.cachefile = settings.OUTPUT_DIR + os.sep + 'abed_cache.pkl'
     mkdir(settings.OUTPUT_DIR)
Ejemplo n.º 6
0
def get_results(basepath=None):
    if basepath is None:
        basepath = '{}/releases/current'.format(myfab.project_path)

    zip_path = '{}/bzips'.format(basepath)
    zip_glob = '*.tar.bz2'
    mkdir(settings.ZIP_DIR)
    get_files_from_glob(zip_path, zip_glob, settings.ZIP_DIR)

    log_path = '{}/logs'.format(basepath)
    log_glob = '*'
    mkdir(settings.LOG_DIR)
    get_files_from_glob(log_path, log_glob, settings.LOG_DIR)
Ejemplo n.º 7
0
def get_table_fname(table, ext, _type):
    if _type == 'html':
        outdir = '%s%s%s' % (settings.OUTPUT_DIR, os.sep, 'html')
    elif _type == 'txt':
        outdir = '%s%s%s' % (settings.OUTPUT_DIR, os.sep, 'txt')
    mkdir(outdir)
    if table.is_metric:
        fname = '%s%sABED_%s_%s_%s%s' % (outdir, os.sep, 
                clean_str(table.target), clean_str(table.name), 
                clean_str(table.type), ext)
    else:
        fname = '%s%sABED_%s_%s%s' % (outdir, os.sep, clean_str(table.target),
                clean_str(table.type), ext)
    return fname
Ejemplo n.º 8
0
def _unpack_zip(zipfile, all_tasks):
    fpath = '%s%s%s' % (settings.ZIP_DIR, os.sep, zipfile)
    try:
        b = bz2file.BZ2File(fpath)
        tar = tarfile.open(fileobj=b)
    except tarfile.ReadError:
        error("Could not read tarfile: %s" % fpath)
        return
    mkdir(settings.STAGE_DIR)
    tar.extractall(settings.STAGE_DIR)
    tar.close()
    move_results(all_tasks)
    ziplog = settings.ZIP_DIR + os.sep + 'abed_unzipped.txt'
    with open(ziplog, 'a') as fid:
        fid.write(zipfile + '\n')
Ejemplo n.º 9
0
def get_output_dir(result_dir, quiet=False):
    subdirs = os.listdir(result_dir)
    if not subdirs:
        outdir = '%s/0' % (result_dir)
        mkdir(outdir)
        if not quiet:
            info("Created result output dir %s" % outdir)
        return outdir
    latest = sorted(map(int, subdirs))[-1]
    files = os.listdir(result_dir + '/' + str(latest))
    if len(files) >= settings.MAX_FILES:
        outdir = '%s/%i' % (result_dir, latest + 1)
        mkdir(outdir)
        if not quiet:
            info("Created result output dir %s" % outdir)
    else:
        outdir = '%s/%i' % (result_dir, latest)
    return outdir
Ejemplo n.º 10
0
def fab_repull():
    releasepath = '{}/releases'.format(myfab.project_path)
    lstext = myfab.run('ls -1 {}'.format(releasepath))
    special = ['current', 'previous']
    paths = [x for x in lstext.split('\n') if not x in special]

    with open(settings.AUTO_FILE, 'r') as fid:
        lines = fid.readlines()
    auto_jobids = [x.strip() for x in lines]

    to_pull = []
    for path in paths:
        fullpath = '{}/{}'.format(releasepath, path)
        logpath = '{}/{}'.format(fullpath, 'logs')
        jobid = get_jobid_from_logs(logpath)
        if jobid in auto_jobids:
            to_pull.append(fullpath)

    for path in to_pull:
        zip_path = '{}/bzips/'.format(path)
        zip_glob = '*.tar.bz2'
        mkdir(settings.ZIP_DIR)
        get_files_from_glob(zip_path, zip_glob, settings.ZIP_DIR)
Ejemplo n.º 11
0
def init_config():
    txt = """
##############################################################################
#                                General Settings                            #
##############################################################################
PROJECT_NAME = ''
TASK_FILE = './abed_tasks.txt'
AUTO_FILE = './abed_auto.txt'
RESULT_DIR = '/path/to/local/results'
STAGE_DIR = '/path/to/local/stagedir'
MAX_FILES = 1000
ZIP_DIR = './zips'
LOG_DIR = './logs'
OUTPUT_DIR = './output'
AUTO_SLEEP = 120
HTML_PORT = 8000
COMPRESSION = 'bzip2'

##############################################################################
#                          Server parameters and settings                    #
##############################################################################
REMOTE_NEEDS_INIT = True
REMOTE_USER = '******'
REMOTE_HOST = 'address.of.host'
REMOTE_DIR = '/home/%s/projects/project_name' % REMOTE_USER
REMOTE_PORT = 22
REMOTE_SCRATCH = None
REMOTE_SCRATCH_ENV = 'TMPDIR'

##############################################################################
#                      Settings for Master/Worker program                    #
##############################################################################
MW_SENDATONCE = 100 # number of tasks (hashes!) to send at once
MW_COPY_SLEEP = 120

##############################################################################
#                               Experiment type                              #
##############################################################################
# Uncomment the desired type
# Model assessment #
#TYPE = 'ASSESS'

# Cross validation with train and test dataset #
#TYPE = 'CV_TT'
#CV_BASESEED = 123456
#YTRAIN_LABEL = 'y_train'

# Commands defined in a text file #
#TYPE = 'RAW'
#RAW_CMD_FILE = '/path/to/file.txt'

##############################################################################
#                                Build settings                              #
##############################################################################
NEEDS_BUILD = False    # If remote compilation is required
BUILD_DIR = 'build'    # Relative directory where build takes place
BUILD_CMD = 'make all' # Build command

##############################################################################
#                      Experiment parameters and settings                    #
##############################################################################
DATADIR = 'datasets'
EXECDIR = 'execs'
DATASETS = ['dataset_1', 'dataset_2']
METHODS = ['method_1', 'method_2']
PARAMS = {
        'method_1': {
            'param_1': [val_1, val_2],
            'param_2': [val_3, val_4],
            'param_3': [val_5, val_6]
            },
        'method_2': {
            'param_1': [val_1, val_2, val_3],
            },
        }

COMMANDS = {
        'method_1': ("{execdir}/method_1 {datadir}/{dataset} {param_1} "
            "{param_2} {param_3}"),
        'method_2': "{execdir}/method_2 {datadir}/{dataset} {param_1}"
        }

METRICS = {
        'NAME_1': {
            'metric': metric_function_1,
            'best': max
            },
        'NAME_2': {
            'metric': metric_function_2,
            'best': min
            }
        }

SCALARS = {
        'time': {
            'best': min
            },
        }

RESULT_PRECISION = 4

DATA_DESCRIPTION_CSV = None

REFERENCE_METHOD = None

SIGNIFICANCE_LEVEL = 0.05

###############################################################################
#                                PBS Settings                                 #
###############################################################################
PBS_NODES = 1
PBS_WALLTIME = 360   # Walltime in minutes
PBS_CPUTYPE = None
PBS_CORETYPE = None
PBS_PPN = None
PBS_MODULES = ['mpicopy', 'python/2.7.9']
PBS_EXPORTS = ['PATH=$PATH:/home/%s/.local/bin/abed' % REMOTE_USER]
PBS_MPICOPY = ['datasets', EXECDIR, TASK_FILE]
PBS_TIME_REDUCE = 600 # Reduction of runtime in seconds

"""
    configfile = './abed_conf.py'
    with open(configfile, 'w') as fid:
        fid.write(txt)
    info("Wrote initial config to %s." % configfile)
    mkdir('datasets')
    mkdir('execs')
    info("Created 'datasets' and 'execs' directories")
    touch('./abed_auto.txt')
    touch('./abed_tasks.txt')
    info("Created 'abed_auto.txt' and 'abed_tasks.txt'")