def get_upstream_model(args):

    print('[run_downstream] - getting upstream model:', args.upstream)

    if args.upstream == 'transformer':
        options = {
            'ckpt_file': args.ckpt,
            'load_pretrain': 'True',
            'no_grad': 'True' if not args.fine_tune else 'False',
            'dropout': 'default',
            'spec_aug': 'False',
            'spec_aug_prev': 'True',
            'weighted_sum': 'True' if args.weighted_sum else 'False',
            'select_layer': -1,
        }
        upstream_model = TRANSFORMER(options, args.input_dim)
        upstream_model.permute_input = False

    elif args.upstream == 'apc':
        raise NotImplementedError

    elif args.upstream == 'baseline':
        upstream_model = dummy_upstream(args.input_dim)

    else:
        raise NotImplementedError  ######### plug in your upstream pre-trained model here #########

    assert (hasattr(upstream_model, 'forward'))
    assert (hasattr(upstream_model, 'out_dim'))
    return upstream_model
Example #2
0
def get_upstream_model(upstream_opts, upconfig, input_dim, ckpt):
    start_new = strtobool(str(upstream_opts['start_new']))
    fine_tune = strtobool(str(upstream_opts['fine_tune']))
    specaug = strtobool(str(upstream_opts['specaug']))
    encoder_feat = strtobool(str(upstream_opts['encoder_feat']))
    options = {
        'ckpt_file': ckpt,
        'load_pretrain': 'True' if not start_new else 'False',
        'no_grad': 'True' if not fine_tune else 'False',
        'dropout': 'default',
        'spec_aug': 'False',
        'spec_aug_prev': 'True' if specaug else 'False',
        'weighted_sum': 'False',
        'select_layer': -1,
        'encoder_feat': 'True' if encoder_feat else 'False'
    }
    if upconfig == 'default':
        upconfig = None
    upstream_model = TRANSFORMER(options, input_dim, config=upconfig)
    upstream_model.permute_input = False
    assert (hasattr(upstream_model, 'forward'))
    assert (hasattr(upstream_model, 'out_dim'))
    return upstream_model
Example #3
0
    weighted_sum: str, ['True', 'False'], whether to use a learnable weighted sum to integrate hidden representations from all layers, if False then use the last
    select_layer: int, select from all hidden representations, set to -1 to select the last (will only be used when weighted_sum is False)
"""
options = {
    'ckpt_file':
    './result/result_transformer/tera/fmllrBase960-F-N-K-libri/states-1000000.ckpt',
    'load_pretrain': 'True',
    'no_grad': 'True',
    'dropout': 'default',
    'spec_aug': 'False',
    'spec_aug_prev': 'True',
    'weighted_sum': 'False',
    'select_layer': -1,
}
transformer = TRANSFORMER(options=options, inp_dim=40)
transformer.permute_input = False  # Set to False to take input as (B, T, D), otherwise take (T, B, D)

# setup your downstream class model
classifier = example_classifier(input_dim=768, hidden_dim=128,
                                class_num=2).cuda()

# construct the optimizer
params = list(transformer.named_parameters()) + list(
    classifier.named_parameters())
optimizer = get_optimizer(params=params,
                          lr=4e-3,
                          warmup_proportion=0.7,
                          training_steps=50000)

# forward
example_inputs = torch.zeros(