Example #1
0
def s3_get((bucket, source, destin)):
    import boto
    try:
        print 'starting {}'.format(source)
        conn = boto.connect_s3().get_bucket(bucket)
        key = conn.get_key(source)
        key.get_contents_to_filename(destin)
        print 'finished {}'.format(source)
    except:
        rm_rf(destin)
        raise
Example #2
0
def s3_get((bucket, source, destin)):
    import boto
    try:
        print 'starting {}'.format(source)
        conn = boto.connect_s3().get_bucket(bucket)
        key = conn.get_key(source)
        key.get_contents_to_filename(destin)
        print 'finished {}'.format(source)
    except:
        rm_rf(destin)
        raise
Example #3
0
def load_checkpoint(name=None, period_sec=5, debug=False):
    '''
    Grab last full checkpoint for profiling, or list available datasets.
    '''
    loom.store.require(name, ['samples.0.init', 'samples.0.shuffled'])
    inputs, results = get_paths(name, 'checkpoints')

    rm_rf(results['root'])
    mkdir_p(results['root'])
    with chdir(results['root']):

        config = {'schedule': {'checkpoint_period_sec': period_sec}}
        loom.config.config_dump(config, results['samples'][0]['config'])

        # run first iteration
        step = 0
        mkdir_p(str(step))
        kwargs = checkpoint_files(step, '_out')
        print 'running checkpoint {}, tardis_iter 0'.format(step)
        loom.runner.infer(
            config_in=results['samples'][0]['config'],
            rows_in=inputs['samples'][0]['shuffled'],
            tares_in=inputs['ingest']['tares'],
            model_in=inputs['samples'][0]['init'],
            log_out=results['samples'][0]['infer_log'],
            debug=debug,
            **kwargs)
        checkpoint = _load_checkpoint(step)

        # find penultimate checkpoint
        while not checkpoint.finished:
            rm_rf(str(step - 3))
            step += 1
            print 'running checkpoint {}, tardis_iter {}'.format(
                step,
                checkpoint.tardis_iter)
            kwargs = checkpoint_files(step - 1, '_in')
            mkdir_p(str(step))
            kwargs.update(checkpoint_files(step, '_out'))
            loom.runner.infer(
                config_in=results['samples'][0]['config'],
                rows_in=inputs['samples'][0]['shuffled'],
                tares_in=inputs['ingest']['tares'],
                log_out=results['samples'][0]['infer_log'],
                debug=debug,
                **kwargs)
            checkpoint = _load_checkpoint(step)

        print 'final checkpoint {}, tardis_iter {}'.format(
            step,
            checkpoint.tardis_iter)

        last_full = str(step - 2)
        assert os.path.exists(last_full), 'too few checkpoints'
        checkpoint = _load_checkpoint(step)
        print 'saving checkpoint {}, tardis_iter {}'.format(
            last_full,
            checkpoint.tardis_iter)
        for f in checkpoint_files(last_full).values():
            shutil.move(f, results['root'])
        for f in glob.glob(os.path.join(results['root'], '[0-9]*/')):
            shutil.rmtree(f)
Example #4
0
def clean(name):
    '''
    Clean out one dataset.
    '''
    rm_rf(loom.store.get_paths(name)['root'])
Example #5
0
def load_checkpoint(name=None, period_sec=5, debug=False):
    '''
    Grab last full checkpoint for profiling, or list available datasets.
    '''
    if name is None:
        list_options_and_exit(MODEL)

    rows = ROWS.format(name)
    model = MODEL.format(name)
    assert os.path.exists(model), 'First load dataset'
    assert os.path.exists(rows), 'First load dataset'

    destin = CHECKPOINTS.format(name)
    rm_rf(destin)
    mkdir_p(os.path.dirname(destin))

    def load_checkpoint(name):
        checkpoint = loom.schema_pb2.Checkpoint()
        with open_compressed(checkpoint_files(name)['checkpoint']) as f:
            checkpoint.ParseFromString(f.read())
        return checkpoint

    with tempdir(cleanup_on_error=(not debug)):

        config = {'schedule': {'checkpoint_period_sec': period_sec}}
        config_in = os.path.abspath('config.pb.gz')
        loom.config.config_dump(config, config_in)

        # run first iteration
        step = 0
        mkdir_p(str(step))
        kwargs = checkpoint_files(str(step), '_out')
        print 'running checkpoint {}, tardis_iter 0'.format(step)
        loom.runner.infer(
            config_in=config_in,
            rows_in=rows,
            model_in=model,
            debug=debug,
            **kwargs)
        checkpoint = load_checkpoint(step)

        # find penultimate checkpoint
        while not checkpoint.finished:
            rm_rf(str(step - 3))
            step += 1
            print 'running checkpoint {}, tarids_iter {}'.format(
                step,
                checkpoint.tardis_iter)
            kwargs = checkpoint_files(step - 1, '_in')
            mkdir_p(str(step))
            kwargs.update(checkpoint_files(step, '_out'))
            loom.runner.infer(
                config_in=config_in,
                rows_in=rows,
                debug=debug,
                **kwargs)
            checkpoint = load_checkpoint(step)

        print 'final checkpoint {}, tardis_iter {}'.format(
            step,
            checkpoint.tardis_iter)

        last_full = str(step - 2)
        assert os.path.exists(last_full), 'too few checkpoints'
        checkpoint = load_checkpoint(step)
        print 'saving checkpoint {}, tardis_iter {}'.format(
            last_full,
            checkpoint.tardis_iter)
        shutil.move(last_full, destin)
Example #6
0
def clean(name):
    '''
    Clean out one dataset.
    '''
    rm_rf(loom.store.get_paths(name)['root'])