Ejemplo n.º 1
0
def runNode(node, job, opts):
    alisDir = opts.alisDir if opts.alisDir else opts.dataDir

    # Create decoding command for each file
    cmd = '%s ../runDecode.py --dataDir %s --alisDir %s --numFiles 1 --start_file %d --out_file %s.%d' % (PYTHON_CMD, opts.dataDir, alisDir, job, opts.out_file, job)
    print cmd

    full_cmd = 'cd %s/../%s-utils; source ~/.bashrc; ' % (CLUSTER_DIR, DATASET)
    full_cmd += '; ' + cmd
    print full_cmd
    log_file = '/tmp/%s_decode%s.log' % (DATASET, job)
    run_cpu_job(node, full_cmd, stdout=open(log_file, 'w'), blocking=False)
    return None
Ejemplo n.º 2
0
def runNode(node, job, opts):
    alisDir = opts.alisDir if opts.alisDir else opts.dataDir

    # Create decoding command for each file
    cmd = '%s ../runDecode.py --dataDir %s --alisDir %s --numFiles 1 --start_file %d --out_file %s.%d' % (
        PYTHON_CMD, opts.dataDir, alisDir, job, opts.out_file, job)
    print cmd

    full_cmd = 'cd %s/../%s-utils; source ~/.bashrc; ' % (CLUSTER_DIR, DATASET)
    full_cmd += '; ' + cmd
    print full_cmd
    log_file = '/tmp/%s_decode%s.log' % (DATASET, job)
    run_cpu_job(node, full_cmd, stdout=open(log_file, 'w'), blocking=False)
    return None
Ejemplo n.º 3
0
    for d in run_dirs:
        alive = False
        log_file = pjoin(d, 'train.log')
        cfg_file = pjoin(d, 'cfg.json')

        if not os.path.exists(cfg_file):
            # Definitely delete it
            shutil.rmtree(d)
            continue

        alive = file_alive(log_file, max_dur_sec=60 * 60)

        if not alive and not os.path.exists(pjoin(d, 'sentinel')):
            run = os.path.basename(d)
            print 'loading config'
            print cfg_file
            cfg = load_config(cfg_file)
            print 'loaded config'
            host = cfg['host']
            pid = cfg['pid']
            print 'Killing run %s, PID %s on %s' % (run, cfg['pid'],
                                                    cfg['host'])
            # Kill children (due to async data loader)
            run_cpu_job(host, 'pkill -TERM -P %s' % pid)
            # Kill process
            run_cpu_job(host, 'kill -9 %s' % pid)

            if args.clear_dirs:
                print 'Clearing %s' % d
                shutil.rmtree(d)
Ejemplo n.º 4
0
    for d in run_dirs:
        alive = False
        log_file = pjoin(d, 'train.log')
        cfg_file = pjoin(d, 'cfg.json')

        if not os.path.exists(cfg_file):
            # Definitely delete it
            shutil.rmtree(d)
            continue

        alive = file_alive(log_file, max_dur_sec=60*60)

        if not alive and not os.path.exists(pjoin(d, 'sentinel')):
            run = os.path.basename(d)
            print 'loading config'
            print cfg_file
            cfg = load_config(cfg_file)
            print 'loaded config'
            host = cfg['host']
            pid = cfg['pid']
            print 'Killing run %s, PID %s on %s' % (run, cfg['pid'], cfg['host'])
            # Kill children (due to async data loader)
            run_cpu_job(host, 'pkill -TERM -P %s' % pid)
            # Kill process
            run_cpu_job(host, 'kill -9 %s' % pid)

            if args.clear_dirs:
                print 'Clearing %s' % d
                shutil.rmtree(d)