コード例 #1
0
ファイル: test_pipeline.py プロジェクト: isi-nlp/rtg
def test_freeze_pipeline():
    exp = Experiment('experiments/sample-exp', read_only=True)
    exp.config['trainer'].update(dict(steps=50, check_point=25))
    # enable these
    trainable = {
        'include':
        ['src_embed', 'tgt_embed', 'generator', 'encoder:0', 'decoder:0,1']
    }
    exp.config['optim']['trainable'] = trainable
    pipe = Pipeline(exp)
    pipe.run(run_tests=False)
コード例 #2
0
def test_finetune_pipeline_transformer():
    codec_lib = 'nlcodec'
    tmp_dir = tempfile.mkdtemp()
    print(f"Testing finetune transformer: {tmp_dir}")
    config = load_conf('experiments/sample-exp/conf.yml')
    prep = config['prep']
    prep.update(
        dict(codec_lib=codec_lib,
             char_coverage=0.9995,
             finetune_src=prep['train_src'],
             finetune_tgt=prep['train_tgt']))
    exp = Experiment(tmp_dir, config=config, read_only=False)
    exp.config['trainer'].update(
        dict(steps=50,
             check_point=25,
             finetune_steps=100,
             batch_size=400,
             split_ratio=0.1,
             dynamic_epoch=True))
    Pipeline(exp).run()
    assert exp.train_file.exists() or exp.train_db.exists()
    assert exp.finetune_file.exists()
    # TODO: add more assertions

    print(f"Cleaning up {tmp_dir}")
    shutil.rmtree(tmp_dir, ignore_errors=True)
コード例 #3
0
ファイル: test_pipeline.py プロジェクト: isi-nlp/rtg
def test_robertamt_2layer_init():
    tmp_dir = tempfile.mkdtemp()
    config = load_conf('experiments/pretrained/robertamt-xlmr-2layer.yml')
    model_id = config['model_args']['model_id']
    print(f"Testing {model_id} --> {tmp_dir}")
    assert 'pretrainmatch' == config['prep'].get('codec_lib')
    exp = Experiment(tmp_dir, config=config, read_only=False)
    exp.config['trainer'].update(dict(steps=4, check_point=1))
    Pipeline(exp).run(run_tests=False)
    sanity_check_experiment(exp)
    print(f"Cleaning up {tmp_dir}")
    shutil.rmtree(tmp_dir, ignore_errors=True)
コード例 #4
0
ファイル: test_pipeline.py プロジェクト: isi-nlp/rtg
def test_parent_child_pipeline():
    parent_dir = tempfile.mkdtemp()
    # parent_dir = 'tmp-xyz-parent'

    print(f"Making parent at {parent_dir}")
    exp = Experiment(parent_dir,
                     config='experiments/transformer.test.yml',
                     read_only=False)
    exp.config['trainer'].update(dict(steps=50, check_point=25))
    Pipeline(exp).run(run_tests=False)
    sanity_check_experiment(exp)
    assert not exp.parent_model_state.exists()

    child_config = load_conf('experiments/transformer.test.yml')
    child_config.update({
        'parent': {
            'experiment': str(parent_dir),
            'vocab': {
                'shared': 'shared'
            },
            'model': {
                'ensemble': 2
            }
        }
    })

    child_dir = tempfile.mkdtemp()
    # child_dir = 'tmp-xyz-child'
    print(f"Making child at {child_dir}")
    exp = Experiment(child_dir, config=child_config, read_only=False)
    exp.config['trainer'].update(dict(steps=50, check_point=25))
    Pipeline(exp).run(run_tests=False)
    sanity_check_experiment(exp)
    assert exp.parent_model_state.exists()

    for dir in [parent_dir, child_dir]:
        print(f"Cleaning up {dir}")
        shutil.rmtree(dir, ignore_errors=True)
コード例 #5
0
ファイル: test_pipeline.py プロジェクト: isi-nlp/rtg
def test_pipeline_transformer():
    for codec_lib in ['sentpiece', 'nlcodec']:
        tmp_dir = tempfile.mkdtemp()
        config = load_conf('experiments/transformer.test.yml')
        print(f"Testing {codec_lib} --> {tmp_dir}")
        config['prep'].update({
            'codec_lib': codec_lib,
            'char_coverage': 0.9995
        })
        exp = Experiment(tmp_dir, config=config, read_only=False)
        exp.config['trainer'].update(dict(steps=50, check_point=25))
        exp.config['prep']['num_samples'] = 0
        Pipeline(exp).run(run_tests=False)
        sanity_check_experiment(exp)
        print(f"Cleaning up {tmp_dir}")
        shutil.rmtree(tmp_dir, ignore_errors=True)
コード例 #6
0
def test_spark_prep():
    tmp_dir = tempfile.mkdtemp()
    try:
        print(f"Testing dataprep on pyspark: {tmp_dir}")
        config = load_conf('experiments/spark-bigdataprep.yml')
        exp = Experiment(tmp_dir, config=config, read_only=False)
        exp.config['trainer'].update(
            dict(steps=50, check_point=25, batch_size=400))
        Pipeline(exp).run()
        assert exp._prepared_flag.exists()
        assert exp._trained_flag.exists()
        assert exp.train_file.exists() or exp.train_db.exists()
        sanity_check_experiment(exp)
    finally:
        print(f"Cleaning up {tmp_dir}")
        shutil.rmtree(tmp_dir, ignore_errors=True)
コード例 #7
0
ファイル: test_pipeline.py プロジェクト: isi-nlp/rtg
def test_prepared_pipeline():
    exp = Experiment('experiments/sample-exp', read_only=True)
    exp.config['trainer'].update(dict(steps=50, check_point=25))
    pipe = Pipeline(exp)
    pipe.run(run_tests=False)