Exemple #1
0
    args = parser.parse_args()
    print 'Parsed args'

    run_dirs = get_run_dirs(args.run_dir)

    for d in run_dirs:
        alive = False
        log_file = pjoin(d, 'train.log')
        cfg_file = pjoin(d, 'cfg.json')

        if not os.path.exists(cfg_file):
            # Definitely delete it
            shutil.rmtree(d)
            continue

        alive = file_alive(log_file, max_dur_sec=60 * 60)

        if not alive and not os.path.exists(pjoin(d, 'sentinel')):
            run = os.path.basename(d)
            print 'loading config'
            print cfg_file
            cfg = load_config(cfg_file)
            print 'loaded config'
            host = cfg['host']
            pid = cfg['pid']
            print 'Killing run %s, PID %s on %s' % (run, cfg['pid'],
                                                    cfg['host'])
            # Kill children (due to async data loader)
            run_cpu_job(host, 'pkill -TERM -P %s' % pid)
            # Kill process
            run_cpu_job(host, 'kill -9 %s' % pid)
Exemple #2
0
def process_run_dir(run_dir, figs=False):
    print run_dir
    run_data = dict()

    # Config file
    cfg_file = pjoin(run_dir, 'cfg.json')
    if not fexists(cfg_file):
        print 'No config file in %s' % run_dir
        return

    # Get epoch
    epoch_file = pjoin(run_dir, 'epoch')
    if os.path.exists(epoch_file):
        epoch = int(open(epoch_file, 'r').read().strip())
    else:
        epoch = -1
    run_data['epoch'] = epoch

    last_cost_file = pjoin(run_dir, 'last_cost')
    if os.path.exists(last_cost_file):
        run_data['cost'] = float(open(last_cost_file, 'r').read())

    # Alive / not
    log_file = pjoin(run_dir, 'train.log')
    run_data['alive'] = file_alive(log_file, max_dur_sec=60*60)

    # Complete / not
    run_data['complete'] = os.path.exists(pjoin(run_dir, 'sentinel'))

    if run_data['complete']:
        run_data['alive'] = "<span style='background:#ccc;'>False</span>"
    elif run_data['alive']:
        run_data['alive'] = "<span style='background:#6d6;color:#fff'>True</span>"
    else:
        run_data['alive'] = "<span style='background:#d66;color:#fff'>False</span>"

    run_data['run'] = os.path.basename(run_dir)
    num_files_file = pjoin(run_dir, 'num_files')
    if os.path.exists(num_files_file):
        run_data['num_files'] = open(num_files_file, 'r').read()

    read_cfg(cfg_file, run_data)

    # TODO Load CER and WER

    if figs and os.path.exists(pjoin(run_dir, 'params.pk')):
        plot_file = pjoin(run_dir, 'plot.png')
        cmd = 'python plot_results.py %s --out_file %s' % (run_dir, plot_file)

        # Check if params file has been modified after the plot image file
        params_file = pjoin(run_dir, 'params.pk')
        if (not os.path.exists(plot_file)) or (last_modified(plot_file) < last_modified(params_file)):
            print '%s modified, generating plot' % params_file
            try:
                check_call(cmd, shell=True)
            except:
                pass

        if args.viewer_dir:
            plot_dir = pjoin(args.viewer_dir, 'plots')
            if not os.path.exists(plot_dir):
                os.makedirs(plot_dir)
            if os.path.exists(pjoin(run_dir, 'plot.png')):
                shutil.copyfile(pjoin(run_dir, 'plot.png'),
                        pjoin(plot_dir, '%s.png' % run_data['run']))

    return run_data
Exemple #3
0
    args = parser.parse_args()
    print 'Parsed args'

    run_dirs = get_run_dirs(args.run_dir)

    for d in run_dirs:
        alive = False
        log_file = pjoin(d, 'train.log')
        cfg_file = pjoin(d, 'cfg.json')

        if not os.path.exists(cfg_file):
            # Definitely delete it
            shutil.rmtree(d)
            continue

        alive = file_alive(log_file, max_dur_sec=60*60)

        if not alive and not os.path.exists(pjoin(d, 'sentinel')):
            run = os.path.basename(d)
            print 'loading config'
            print cfg_file
            cfg = load_config(cfg_file)
            print 'loaded config'
            host = cfg['host']
            pid = cfg['pid']
            print 'Killing run %s, PID %s on %s' % (run, cfg['pid'], cfg['host'])
            # Kill children (due to async data loader)
            run_cpu_job(host, 'pkill -TERM -P %s' % pid)
            # Kill process
            run_cpu_job(host, 'kill -9 %s' % pid)