Beispiel #1
0
def fairseq_train(
    preprocessed_dir,
    exp_dir,
    ngpus=1,
    batch_size=8192,  # Batch size across all gpus (taking update freq into account)
    max_sentences=64,  # Max sentences per GPU
    arch='transformer',
    save_interval_updates=100,
    max_update=50000,
    lr=0.001,
    warmup_updates=4000,
    dropout=0.1,
    lr_scheduler='inverse_sqrt',
    criterion='label_smoothed_cross_entropy',
    seed=None,
    fp16=True,
    **kwargs,
):
    with log_std_streams(exp_dir / 'fairseq_train.stdout'):
        exp_dir = Path(exp_dir)
        preprocessed_dir = Path(preprocessed_dir)
        exp_dir.mkdir(exist_ok=True, parents=True)
        # Copy dictionaries to exp_dir for generation
        for dict_path in preprocessed_dir.glob('dict.*.txt'):
            shutil.copy(dict_path, exp_dir)
        checkpoints_dir = exp_dir / 'checkpoints'
        total_real_batch_size = max_sentences * ngpus
        update_freq = int(round(batch_size / total_real_batch_size, 0))
        if seed is None:
            seed = random.randint(0, 1000)
        distributed_port = random.randint(10000, 20000)
        args = f'''
        {preprocessed_dir} --task translation --source-lang complex --target-lang simple --save-dir {checkpoints_dir}
        --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0
        --criterion {criterion} --label-smoothing 0.1
        --lr-scheduler {lr_scheduler} --lr {lr} --warmup-updates {warmup_updates} --update-freq {update_freq}
        --arch {arch} --dropout {dropout} --weight-decay 0.0 --clip-norm 0.1 --share-all-embeddings
        --no-epoch-checkpoints --save-interval 999999 --validate-interval 999999
        --max-update {max_update} --save-interval-updates {save_interval_updates} --keep-interval-updates 1 --patience 10
        --batch-size {max_sentences} --seed {seed}
        --distributed-world-size {ngpus} --distributed-port {distributed_port}
        '''
        if lr_scheduler == 'inverse_sqrt':
            args += '--warmup-init-lr 1e-07'
        if fp16:
            args += f' --fp16'
        # FIXME: if the kwargs are already present in the args string, they will appear twice but fairseq will take only the last one into account
        args += f' {args_dict_to_str(kwargs)}'
        args = remove_multiple_whitespaces(args.replace('\n', ' ')).strip(' ')
        # Recover lost quotes around adam betas
        args = re.sub(r'--adam-betas (\(0\.\d+, 0\.\d+\))',
                      r"--adam-betas '\1'", args)
        print(f'fairseq-train {args}')
        with mock_cli_args(shlex.split(args)):
            train.cli_main()
Beispiel #2
0
#!/usr/bin/env python3 -u
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from comet_ml import Experiment
from fairseq_cli.train import cli_main


if __name__ == '__main__':
    cli_main()
def ls_cli_main(*args, **kwargs):
    user_path = pathlib.Path(__file__).parent.joinpath("fs_modules")
    sys.argv.extend(["--user-dir", str(user_path)])
    cli_main(*args, **kwargs)
        '--lr', '0.0005',
        '-s', 'src', '-t', 'tgt',
        '--label-smoothing', '0.1',
        '--dropout', '0.3',
        '--max-tokens', '4000',
        '--min-lr', '1e-09',
        '--lr-scheduler', 'inverse_sqrt',
        '--weight-decay', '0.0001',
        '--criterion', 'label_smoothed_cross_entropy',
        '--max-update', '150000',
        '--warmup-updates', '4000',
        '--warmup-init-lr', '1e-07',
        '--adam-betas', '(0.9,0.98)',
        '--max-source-positions', '10240',
        '--save-dir', 'checkpoints/transformer',
        # '--dataset-impl', 'raw',
        '--share-all-embeddings',



        # '--encoder-embed-dim', '64',
        # '--encoder-ffn-embed-dim', '128',
        # '--encoder-attention-heads', '2',
        # '--encoder-layers', '2',
        # '--decoder-embed-dim', '64',
        # '--decoder-ffn-embed-dim', '128',
        # '--decoder-attention-heads', '2',
        # '--decoder-layers', '2'
    ]
    train.cli_main()
Beispiel #5
0
    args_dict['current_host'] = os.environ['SM_CURRENT_HOST']
    args_dict['distributed-world-size'] = str(len(args_dict['hosts']) * num_gpus)
    os.environ['WORLD_SIZE'] = str(len(args_dict['hosts']) * num_gpus)

    os.environ['RANK'] = str(args_dict['hosts'].index(args_dict['current_host']) * num_gpus)
    args_dict.pop('hosts', None)
    args_dict.pop('current_host', None)

    args_dict['restore-file'] = os.path.join(args_dict['pretrained_path'], 'pretrained_model.pt')
    args_dict.pop('pretrained_path', None)

    train_dir = args_dict['train']
    args_dict.pop('train', None)

    args_dict.pop('ngpus', None)

    try:
        prefix = '/opt/ml/'
        param_path = os.path.join(prefix, 'input/config/hyperparameters.json')
        # Read in any hyperparameters that the user passed with the training job
        with open(param_path, 'r') as tc:
            training_params = json.load(tc)

        for k,v in training_params.items():
            args_dict[k] = v
    except:
        print("hyperparameters.json not found! Probably running without Sagemaker!")
    training_args = [train_dir,] + convert_args_dict_to_list(args_dict) + unparsed

    cli_main(training_args)
Beispiel #6
0
#!/usr/local/opt/python/bin/python3.7
# -*- coding: utf-8 -*-
import re
import sys
from os import path
fairseq_path = path.abspath(path.join(path.abspath(__file__), '../../fairseq'))
sys.path.insert(0, fairseq_path)
from fairseq_cli.train import cli_main

if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
    sys.exit(cli_main())