Ejemplo n.º 1
0
def write_table_ajax(table, is_summary=False):
    if is_summary:
        fname = get_table_fname(table, '_summary_ajax.txt', 'html')
    else:
        fname = get_table_fname(table, '_ajax.txt', 'html')
    with open(fname, 'w') as fid:
        fid.write("{\n")
        fid.write("  \"data\": [\n")
        pairs = [(_id, row) for (_id, row) in table]
        for _id, row in pairs[:-1]:
            fid.write("    [\n")
            fid.write("      \"%s\",\n" % str(_id))
            for elem in row[:-1]:
                fid.write("      \"%s\",\n" % str(elem))
            fid.write("      \"%s\"\n" % str(row[-1]))
            fid.write("    ],\n")
        _id, row = pairs[-1]
        fid.write("    [\n")
        fid.write("      \"%s\",\n" % str(_id))
        for elem in row[:-1]:
            fid.write("      \"%s\",\n" % str(elem))
        fid.write("      \"%s\"\n" % str(row[-1]))
        fid.write("    ]\n")
        fid.write("  ]\n")
        fid.write("}\n")
    info("Created output file: %s" % fname)
Ejemplo n.º 2
0
def cvtt_build_tables_metric(abed_cache, train_metric, test_metric, target):
    table = AbedTable()
    table.headers = ['ID'] + sorted(abed_cache.methods)
    info("Generating tables for train metric %s, test metric %s, target %s" %
         (train_metric, test_metric, target))
    for i, dset in enum_progress(sorted(abed_cache.datasets), label='Tables'):
        row = []
        for j, method in enumerate(sorted(abed_cache.methods)):
            results = list(abed_cache.iter_results_dm(dset, method))
            values = [
                r.get_result(settings.YTRAIN_LABEL, metric=train_metric)
                for r in results
            ]
            if not values:
                row.append('NaN')
                continue
            best_value = settings.METRICS[train_metric]['best'](values)
            best_results = [
                r for r in results if r.get_result(
                    settings.YTRAIN_LABEL, metric=train_metric) == best_value
            ]
            target_values = [
                r.get_result(target, metric=test_metric) for r in best_results
            ]
            target_best = settings.METRICS[test_metric]['best'](target_values)
            rounded = round(target_best, settings.RESULT_PRECISION)
            fmt = '%%.%df' % settings.RESULT_PRECISION
            row.append(fmt % rounded)
        table.add_row(dset, row)
    return table
Ejemplo n.º 3
0
 def local(self):
     if self.task_dict is None:
         error("No tasks defined before attempted run. Exiting")
         raise SystemExit
     copy_local_files()
     mpi_start(self.task_dict, local=True)
     info("Finished with run command.")
Ejemplo n.º 4
0
def update_result_cache(task_dict, skip_cache=False):
    ac = AbedCache()
    try:
        ac.load()
        info("Result cache loaded from disk.")
    except IOError:
        info("Result cache non-existent, generating it.")
        ac = init_result_cache(task_dict)
        return ac

    # User requested skip of cache regeneration
    if skip_cache:
        warning("Skipping cache regeneration check on user request.")
        return ac

    # updating the result cache is done in two steps:
    # 1. Check if new metrics or scalars are added, if so regenerate everything
    # 2. Check if new result files are added, if that's the case only generate
    # those
    conf_metrics = set(settings.METRICS.keys())
    cache_metrics = ac.metrics
    diff = conf_metrics - cache_metrics
    if len(diff) > 0:
        ac = init_result_cache(task_dict)
        return ac

    for dataset, method, fid, hsh in walk_for_cache(ac):
        result = parse_result_fileobj(fid, hsh, dataset, method)
        if result is None:
            continue
        ac.add_result(result)

    ac.dump()
    return ac
Ejemplo n.º 5
0
 def run(self):
     # this takes over master/worker
     if self.task_dict is None:
         error("No tasks defined before attempted run. Exiting")
         raise SystemExit
     mpi_start(self.task_dict)
     info("Finished with run command.")
Ejemplo n.º 6
0
 def update_tasks(self):
     # this takes over update_tasks
     cnt = update_tasks(self.task_dict)
     info("Task update removed %i completed tasks. Tasks remaining: %i" %
          (cnt, len(self.task_dict)))
     self.write_tasks()
     git_commit_tbd()
     if len(self.task_dict) == 0:
         info("All tasks completed. Cool cool cool.")
Ejemplo n.º 7
0
def write_table_json(table, tabid):
    data = []
    averages = next((row for _id, row in table if _id == 'Average'), None)
    headers = table.headers[1:]
    for hdr, avg in zip(headers, averages):
        data.append({'name': hdr, 'time': float(avg)})
    fname = os.path.join(settings.OUTPUT_DIR, 'html', tabid + '.json')
    with open(fname, 'w') as fid:
        fid.write(json.dumps(data))
    info("Created output file: %s" % fname)
Ejemplo n.º 8
0
def git_commit_auto():
    try:
        check_output([
            'git', 'commit', '-m', 'automatic commit of auto log file',
            settings.AUTO_FILE
        ])
    except CalledProcessError as err:
        error("Error performing autocommit for auto log file. Error message:")
        print(err.output)
        raise SystemExit
    info("Automatic auto log file commit")
Ejemplo n.º 9
0
def git_commit_tbd():
    try:
        check_output([
            'git', 'commit', '-m', 'automatic commit of TBD task file',
            settings.TASK_FILE
        ])
    except CalledProcessError as err:
        error("Error performing autocommit for TBD file. Error message:")
        print(err.output)
        raise SystemExit
    info("Automatic TBD file commit")
Ejemplo n.º 10
0
def compress_results(task_dict):
    completed_dsets = []
    for dset in settings.DATASETS:
        try:
            files = [f for f in files_w_dataset(dset)]
        except:
            continue
        if dataset_completed(files, dset, task_dict):
            completed_dsets.append(dset)

    info("Starting compression of %i completed result directories." %
         len(completed_dsets))
    for dset in iter_progress(completed_dsets, 'Datasets'):
        compress_dataset(dset)
Ejemplo n.º 11
0
def init_data():
    """ Push the data to the remote server """
    local('tar czf datasets.tar.gz -C {} .'.format(settings.DATADIR, os.sep))
    release_time = time.strftime('%s')
    release_path = '{ppath}/{datapath}/{relpath}'.format(
        ppath=myfab.project_path, datapath='datasets', relpath=release_time)
    myfab.run('mkdir -p {releasepath}'.format(releasepath=release_path))
    myfab.put('./datasets.tar.gz', release_path)
    myfab.run('cd {} && tar xvf datasets.tar.gz'.format(release_path))
    myfab.run('cd {} && '.format(release_path, 'datasets') +
              'rm datasets.tar.gz')
    local('rm datasets.tar.gz')
    info('Remote datasets placed in: {}'.format(release_path))
    myfab.data_path = release_path
Ejemplo n.º 12
0
def init_result_cache(task_dict):
    ac = AbedCache(methods=settings.METHODS,
                   datasets=settings.DATASETS,
                   metrics=settings.METRICS,
                   scalars=settings.SCALARS)
    info("Starting cache generation")
    counter = 0
    for dataset, method, fid, hsh in walk_for_cache(ac):
        result = parse_result_fileobj(fid, hsh, dataset, method)
        if result is None:
            continue
        ac.add_result(result)
        counter += 1
    ac.dump()
    info("Read %i result files into cache." % counter)
    return ac
Ejemplo n.º 13
0
def get_output_dir(result_dir, quiet=False):
    subdirs = os.listdir(result_dir)
    if not subdirs:
        outdir = '%s/0' % (result_dir)
        mkdir(outdir)
        if not quiet:
            info("Created result output dir %s" % outdir)
        return outdir
    latest = sorted(map(int, subdirs))[-1]
    files = os.listdir(result_dir + '/' + str(latest))
    if len(files) >= settings.MAX_FILES:
        outdir = '%s/%i' % (result_dir, latest + 1)
        mkdir(outdir)
        if not quiet:
            info("Created result output dir %s" % outdir)
    else:
        outdir = '%s/%i' % (result_dir, latest)
    return outdir
Ejemplo n.º 14
0
 def pull(self, jobid=None):
     info("Starting pull")
     fab_pull()
     info("Starting unpacking of zips")
     unpack_zips()
     if jobid is None:
         jobid = get_jobid_from_logs()
     info("Marking job as pulled: %s" % jobid)
     mark_job(jobid)
     git_commit_auto()
     info("Updating tasks")
     self.update_tasks()
Ejemplo n.º 15
0
def do_work(hsh, task, local=False):
    datadir = os.path.join(get_scratchdir(local), 'datasets')
    execdir = os.path.join(get_scratchdir(local), 'execs')
    if settings.TYPE == 'RAW':
        cmd = task.format(datadir=datadir, execdir=execdir)
    else:
        command = settings.COMMANDS[task['method']]
        task['datadir'] = datadir
        task['execdir'] = execdir
        cmd = command.format(**task)
    try:
        info("Executing: '%s'" % cmd, color_wrap=False)
        output = check_output(cmd, shell=True)
    except CalledProcessError as err:
        error("There was an error executing: '%s'. Here is the error: %s" % 
                (cmd, err.output), color_wrap=False)
        return
    write_output(output, hsh)
    info("Finished with %s" % hsh, color_wrap=False)
Ejemplo n.º 16
0
def main():
    args = parse_arguments()

    skip_init = False
    if args.cmd == 'reload_tasks':
        skip_init = True
    if settings is None:
        if not args.cmd == 'init':
            error("No ABED configuration file found in this directory. "
                  "Run 'abed init' to initialize one. Exiting.")
            raise SystemExit
        skip_init = True
    abed = Abed(skip_init=skip_init, skip_cache=args.skip_cache)

    info("Running abed command: %s" % args.cmd)
    try:
        getattr(abed, args.cmd)()
    except KeyboardInterrupt:
        info("Exiting.")
        pass
Ejemplo n.º 17
0
def write_table_txt(table, summary_table):
    fname = get_table_fname(table, '.txt', 'txt')
    now = datetime.datetime.now()
    with open(fname, 'w') as fid:
        fid.write("%% Result file generated by ABED at %s\n" % 
                now.strftime('%c'))
        fid.write("%% Table for label: %s\n" % table.target)
        fid.write("%% Showing: %s\n" % table.type)
        if table.is_metric:
            fid.write('%% Metric: %s\n\n' % table.name)
        txttable = [[i] + r for i, r in table]
        fmt = '.%df' % settings.RESULT_PRECISION
        tabtxt = tabulate(txttable, headers=table.headers, floatfmt=fmt)
        fid.write(tabtxt)
        fid.write('\n\n')
        sumtable = [[i] + r for i, r in summary_table]
        tabtxt = tabulate(sumtable, headers=summary_table.headers,
                floatfmt=fmt)
        fid.write(tabtxt)
    info("Created output file: %s" % fname)
Ejemplo n.º 18
0
 def repull(self):
     # use abed_auto.log to repull all zips from previous runs
     info("Starting repull based on {}".format(settings.AUTO_FILE))
     fab_repull()
     info("Unpacking zips")
     unpack_zips()
     info("Done repulling.")
Ejemplo n.º 19
0
def submitted():
    jobid = get_jobid_from_pbs()
    if jobid is None:
        return None
    state = get_state(jobid)
    if state == QUEUED:
        sttime = get_starttime(jobid)
        if sttime:
            info("Job %s queued. Start time: %s" % (jobid, 
                sttime.strftime("%c")))
        else:
            info("Job %s queued." % jobid)
    elif state == RUNNING:
        rmtime = get_remaining(jobid)
        info("Job %s running. Time remaining: %s" % (jobid, rmtime))
    return True
Ejemplo n.º 20
0
 def auto(self):
     info("Starting auto loop")
     while True:
         if len(self.task_dict) == 0:
             info("Stopping auto loop")
             break
         if submitted() is None:
             info("No submitted task found, assuming done.")
             jobid = get_jobid_from_logs()
             info("Found jobid from logs: %s" % jobid)
             if not is_job_marked(jobid):
                 info("Job %s not pulled yet, pulling it" % jobid)
                 self.pull(jobid=jobid)
             if len(self.task_dict) == 0:
                 break
             self.push()
         info("Task busy, sleeping for a while ...")
         time.sleep(settings.AUTO_SLEEP)
     info("Starting parse_results")
     self.parse_results()
Ejemplo n.º 21
0
def write_tables_html(doc):
    fname = os.path.join(settings.OUTPUT_DIR, 'html',
                         AbedHTMLTypes.RANK_TESTS[-1])
    with open(fname, 'w') as fid:
        fid.write(doc)
    info('Created output file: %s' % fname)
Ejemplo n.º 22
0
 def push(self):
     if not git_ok():
         error("Git repository has uncommitted changes, not pushing.")
         raise SystemExit
     info("Starting push")
     fab_push()
Ejemplo n.º 23
0
def init_config():
    txt = """
##############################################################################
#                                General Settings                            #
##############################################################################
PROJECT_NAME = ''
TASK_FILE = './abed_tasks.txt'
AUTO_FILE = './abed_auto.txt'
RESULT_DIR = '/path/to/local/results'
STAGE_DIR = '/path/to/local/stagedir'
MAX_FILES = 1000
ZIP_DIR = './zips'
LOG_DIR = './logs'
OUTPUT_DIR = './output'
AUTO_SLEEP = 120
HTML_PORT = 8000
COMPRESSION = 'bzip2'

##############################################################################
#                          Server parameters and settings                    #
##############################################################################
REMOTE_NEEDS_INIT = True
REMOTE_USER = '******'
REMOTE_HOST = 'address.of.host'
REMOTE_DIR = '/home/%s/projects/project_name' % REMOTE_USER
REMOTE_PORT = 22
REMOTE_SCRATCH = None
REMOTE_SCRATCH_ENV = 'TMPDIR'

##############################################################################
#                      Settings for Master/Worker program                    #
##############################################################################
MW_SENDATONCE = 100 # number of tasks (hashes!) to send at once
MW_COPY_SLEEP = 120

##############################################################################
#                               Experiment type                              #
##############################################################################
# Uncomment the desired type
# Model assessment #
#TYPE = 'ASSESS'

# Cross validation with train and test dataset #
#TYPE = 'CV_TT'
#CV_BASESEED = 123456
#YTRAIN_LABEL = 'y_train'

# Commands defined in a text file #
#TYPE = 'RAW'
#RAW_CMD_FILE = '/path/to/file.txt'

##############################################################################
#                                Build settings                              #
##############################################################################
NEEDS_BUILD = False    # If remote compilation is required
BUILD_DIR = 'build'    # Relative directory where build takes place
BUILD_CMD = 'make all' # Build command

##############################################################################
#                      Experiment parameters and settings                    #
##############################################################################
DATADIR = 'datasets'
EXECDIR = 'execs'
DATASETS = ['dataset_1', 'dataset_2']
METHODS = ['method_1', 'method_2']
PARAMS = {
        'method_1': {
            'param_1': [val_1, val_2],
            'param_2': [val_3, val_4],
            'param_3': [val_5, val_6]
            },
        'method_2': {
            'param_1': [val_1, val_2, val_3],
            },
        }

COMMANDS = {
        'method_1': ("{execdir}/method_1 {datadir}/{dataset} {param_1} "
            "{param_2} {param_3}"),
        'method_2': "{execdir}/method_2 {datadir}/{dataset} {param_1}"
        }

METRICS = {
        'NAME_1': {
            'metric': metric_function_1,
            'best': max
            },
        'NAME_2': {
            'metric': metric_function_2,
            'best': min
            }
        }

SCALARS = {
        'time': {
            'best': min
            },
        }

RESULT_PRECISION = 4

DATA_DESCRIPTION_CSV = None

REFERENCE_METHOD = None

SIGNIFICANCE_LEVEL = 0.05

###############################################################################
#                                PBS Settings                                 #
###############################################################################
PBS_NODES = 1
PBS_WALLTIME = 360   # Walltime in minutes
PBS_CPUTYPE = None
PBS_CORETYPE = None
PBS_PPN = None
PBS_MODULES = ['mpicopy', 'python/2.7.9']
PBS_EXPORTS = ['PATH=$PATH:/home/%s/.local/bin/abed' % REMOTE_USER]
PBS_MPICOPY = ['datasets', EXECDIR, TASK_FILE]
PBS_TIME_REDUCE = 600 # Reduction of runtime in seconds

"""
    configfile = './abed_conf.py'
    with open(configfile, 'w') as fid:
        fid.write(txt)
    info("Wrote initial config to %s." % configfile)
    mkdir('datasets')
    mkdir('execs')
    info("Created 'datasets' and 'execs' directories")
    touch('./abed_auto.txt')
    touch('./abed_tasks.txt')
    info("Created 'abed_auto.txt' and 'abed_tasks.txt'")
Ejemplo n.º 24
0
 def setup(self):
     info("Starting setup")
     fab_setup()
Ejemplo n.º 25
0
 def write_tasks(self):
     with open(settings.TASK_FILE, 'w') as fid:
         for task in sorted(self.task_dict.keys()):
             fid.write('%s\n' % task)
     info("Written task file to %s" % settings.TASK_FILE)
Ejemplo n.º 26
0
def write_tables_html(doc):
    fname = '%s%s%s%s%s' % (settings.OUTPUT_DIR, os.sep, 'html', os.sep,
                            'metric_tables.html')
    with open(fname, 'w') as fid:
        fid.write(doc)
    info("Created output file: %s" % fname)
Ejemplo n.º 27
0
 def parse_results(self):
     # this takes over parse_results.py
     info("Starting make_results()")
     make_results(self.task_dict, self.skip_cache)
Ejemplo n.º 28
0
 def status(self):
     info("Number of tasks to be done: %i" % len(self.task_dict))
     info("Total number of tasks defined: %i" % (len(init_tasks())))