def acronyms_finetune(args):
    args.git_hash = get_git_revision_hash()
    render_args(args)

    prev_args, bsg_model, vocab, _ = restore_model(args.bsg_experiment)

    # Load Data
    data_dir = '../eval/eval_data/minnesota/'
    sense_fp = os.path.join(data_dir, 'sense_inventory_ii')
    lfs, lf_sf_map, sf_lf_map = parse_sense_df(sense_fp)
    df = pd.read_csv(os.path.join(data_dir, 'preprocessed_dataset_window_{}.csv'.format(prev_args.window)))
    df['target_lf_idx'] = df['sf'].combine(df['target_lf'], lambda sf, lf: target_lf_index(lf, sf_lf_map[sf]))
    prev_N = df.shape[0]
    df = df[df['target_lf_idx'] > -1]
    print('Removed {} examples for which the target LF is not exactly in the sense inventory ii'.format(
        prev_N - df.shape[0]))

    sfs = df['sf'].unique().tolist()
    used_sf_lf_map = defaultdict(list)
    dominant_sfs = set()

    for sf in sfs:
        subset_df = df[df['sf'] == sf]
        used_target_idxs = subset_df['target_lf_idx'].unique()
        if len(used_target_idxs) == 1:
            dominant_sfs.add(sf)
        else:
            for lf_idx in used_target_idxs:
                used_sf_lf_map[sf].append(sf_lf_map[sf][lf_idx])

    prev_N = df.shape[0]
    df = df[~df['sf'].isin(dominant_sfs)]
    print(('Removing {} examples from {} SF\'s because they have only 1 sense associated with'
           ' them after preprocessing'.format(prev_N - df.shape[0], len(dominant_sfs))))

    df['used_target_lf_idx'] = df['sf'].combine(df['target_lf'], lambda sf, lf: target_lf_index(lf, used_sf_lf_map[sf]))

    sf_tokenized_lf_map = {}
    for sf, lf_list in used_sf_lf_map.items():
        sf_tokenized_lf_map[sf] = list(map(lf_tokenizer, lf_list))

    train_df, test_df = train_test_split(df, random_state=1992, test_size=0.2)
    train_batcher = AcronymBatcherLoader(train_df, batch_size=args.batch_size)
    test_batcher = AcronymBatcherLoader(test_df, batch_size=args.batch_size)

    render_test_statistics(test_df, used_sf_lf_map)

    # Create model experiments directory or clear if it already exists
    weights_dir = os.path.join('../acronyms', 'weights', args.experiment)
    if os.path.exists(weights_dir):
        print('Clearing out previous weights in {}'.format(weights_dir))
        rmtree(weights_dir)
    os.mkdir(weights_dir)
    results_dir = os.path.join('../acronyms', weights_dir, 'results')
    os.mkdir(results_dir)
    os.mkdir(os.path.join(results_dir, 'confusion'))

    model = AcronymExpander(bsg_model)

    # Instantiate Adam optimizer
    trainable_params = filter(lambda x: x.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(trainable_params, lr=args.lr)

    loss_func = nn.CrossEntropyLoss()
    best_weights = None
    best_epoch = 1
    lowest_test_loss = run_test_epoch(args, test_batcher, model, loss_func, vocab, sf_tokenized_lf_map)

    # Make sure it's calculating gradients
    model.train()  # just sets .requires_grad = True
    for epoch in range(1, args.epochs + 1):
        sleep(0.1)  # Make sure logging is synchronous with tqdm progress bar
        print('Starting Epoch={}'.format(epoch))

        train_loss = run_train_epoch(args, train_batcher, model, loss_func, optimizer, vocab, sf_tokenized_lf_map)
        test_loss = run_test_epoch(args, test_batcher, model, loss_func, vocab, sf_tokenized_lf_map)

        losses_dict = {
            'train': train_loss,
            'test_loss': test_loss
        }

        checkpoint_fp = os.path.join(weights_dir, 'checkpoint_{}.pth'.format(epoch))
        save_checkpoint(args, model, optimizer, vocab, losses_dict, checkpoint_fp=checkpoint_fp)

        lowest_test_loss = min(lowest_test_loss, test_loss)
        best_weights = model.state_dict()
        if lowest_test_loss == test_loss:
            best_epoch = epoch

        if args.debug:
            break
    print('Loading weights from {} epoch to perform error analysis'.format(best_epoch))
    model.load_state_dict(best_weights)
    losses_dict['test_loss'] = lowest_test_loss
    checkpoint_fp = os.path.join(weights_dir, 'checkpoint_best.pth')
    save_checkpoint(args, model, optimizer, vocab, losses_dict, checkpoint_fp=checkpoint_fp)
    error_analysis(test_batcher, model, used_sf_lf_map, loss_func, vocab, results_dir=results_dir)
Beispiel #2
0
    parser.add_argument('-combine_phrases', default=False, action='store_true')
    parser.add_argument('-section2vec', default=False, action='store_true')
    parser.add_argument('--epochs', default=4, type=int)
    parser.add_argument('--lr', default=0.001, type=float)
    parser.add_argument('--window', default=5, type=int)
    parser.add_argument('-use_pretrained', default=False, action='store_true')

    # Model Hyperparameters
    parser.add_argument('--encoder_hidden_dim', default=64, type=int, help='hidden dimension for encoder')
    parser.add_argument('--encoder_input_dim', default=64, type=int, help='embedding dimemsions for encoder')
    parser.add_argument('--hinge_loss_margin', default=1.0, type=float, help='reconstruction margin')
    parser.add_argument('--latent_dim', default=100, type=int, help='z dimension')

    args = parser.parse_args()
    args.git_hash = get_git_revision_hash()
    render_args(args)

    # Load Data
    debug_str = '_mini' if args.debug else ''
    phrase_str = '_phrase' if args.combine_phrases else ''

    ids_infile = os.path.join(args.data_dir, 'ids{}{}.npy'.format(debug_str, phrase_str))
    print('Loading data from {}...'.format(ids_infile))
    with open(ids_infile, 'rb') as fd:
        ids = np.load(fd)

    # Load Vocabulary
    vocab_infile = '../preprocess/data/vocab{}{}.pk'.format(debug_str, phrase_str)
    print('Loading vocabulary from {}...'.format(vocab_infile))
    with open(vocab_infile, 'rb') as fd:
        vocab = pickle.load(fd)
Beispiel #3
0
def run_evaluation(args,
                   acronym_model,
                   dataset_loader,
                   restore_func,
                   train_frac=0.0):
    """
    :param args: argparse instance specifying evaluation configuration (including pre-trained model path)
    :param acronym_model: PyTorch model to rank candidate acronym expansions (an instance of model from ./modules/)
    :param dataset_loader: function to load acronym expansion dataset (i.e. either CASI or Reverse Substitution MIMIC)
    :param restore_func: Function to load pre-trained model weights (different for BSG and LMC)
    :param train_frac: If you want to fine tune the model, this should be about 0.8.
    Otherwise, default of 0.0 means the entire dataset is used as a test set for evaluation
    :return:
    """
    args.git_hash = get_git_revision_hash()
    render_args(args)

    if args.lm_type == 'bsg':
        prev_args, lm, token_vocab, _ = restore_func(args.lm_experiment,
                                                     ckpt=args.ckpt)
        metadata_vocab = None
        prev_args.metadata = None
    else:
        prev_args, lm, token_vocab, metadata_vocab, _, _, _ = restore_func(
            args.lm_experiment, ckpt=args.ckpt)
    train_batcher, test_batcher, train_df, test_df, sf_lf_map = dataset_loader(
        prev_args, train_frac=train_frac, batch_size=args.batch_size)
    args.metadata = prev_args.metadata

    # Construct smoothed empirical probabilities of metadata conditioned on LF ~ p(metadata|LF)
    lf_metadata_counts = extract_smoothed_metadata_probs(
        metadata=args.metadata)

    casi_dir = os.path.join(home_dir, 'shared_data', 'casi')
    canonical_lfs = pd.read_csv(os.path.join(casi_dir,
                                             'labeled_sf_lf_map.csv'))
    canonical_sf_lf_map = dict(
        canonical_lfs.groupby('target_lf_sense')['target_label'].apply(list))

    sf_tokenized_lf_map = defaultdict(list)
    prev_vocab_size = token_vocab.size()
    for sf, lf_list in sf_lf_map.items():
        token_vocab.add_token(sf.lower())
        for lf in lf_list:
            canonical_lf_arr = list(set(canonical_sf_lf_map[lf]))
            assert len(canonical_lf_arr) == 1
            canonical_lf = canonical_lf_arr[0]
            tokens = lf_tokenizer(canonical_lf, token_vocab)
            sf_tokenized_lf_map[sf].append(tokens)
            for t in tokens:
                token_vocab.add_token(t)
    new_vocab_size = token_vocab.size()
    print('Added {} tokens to vocabulary from LF targets and SFs.'.format(
        new_vocab_size - prev_vocab_size))

    render_test_statistics(test_df, sf_lf_map)

    if lf_metadata_counts is not None:
        if args.dataset == 'mimic':
            train_lf_metadata_counts, val_lf_metadata_counts = split_marginals(
                lf_metadata_counts)
        else:
            train_lf_metadata_counts = lf_metadata_counts
            val_lf_metadata_counts = _generate_marginals(test_df)
        render_dominant_section_accuracy(train_lf_metadata_counts,
                                         val_lf_metadata_counts, sf_lf_map)

    # Create model experiments directory or clear if it already exists
    weights_dir = os.path.join(home_dir, 'weights', 'acronyms',
                               args.experiment)
    if os.path.exists(weights_dir):
        print('Clearing out previous weights in {}'.format(weights_dir))
        rmtree(weights_dir)
    os.mkdir(weights_dir)
    results_dir = os.path.join(home_dir, 'acronyms', weights_dir, 'results')
    os.mkdir(results_dir)
    os.mkdir(os.path.join(results_dir, 'confusion'))

    model = acronym_model(args, lm, token_vocab).to(args.device)

    # Instantiate Adam optimizer
    trainable_params = filter(lambda x: x.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(trainable_params, lr=args.lr)

    loss_func = nn.CrossEntropyLoss()
    best_weights = model.state_dict()
    best_epoch = 1
    lowest_test_loss, highest_test_acc = run_test_epoch(
        args, test_batcher, model, loss_func, token_vocab, metadata_vocab,
        sf_tokenized_lf_map, sf_lf_map, lf_metadata_counts)

    metrics = analyze(args,
                      test_batcher,
                      model,
                      sf_lf_map,
                      loss_func,
                      token_vocab,
                      metadata_vocab,
                      sf_tokenized_lf_map,
                      lf_metadata_counts,
                      results_dir=results_dir)
    metrics['log_loss'] = lowest_test_loss
    metrics['accuracy'] = highest_test_acc
    if args.epochs == 0:
        return metrics

    # Make sure it's calculating gradients
    for epoch in range(1, args.epochs + 1):
        sleep(0.1)  # Make sure logging is synchronous with tqdm progress bar
        print('Starting Epoch={}'.format(epoch))
        _ = run_train_epoch(args, train_batcher, model, loss_func, optimizer,
                            token_vocab, metadata_vocab, sf_tokenized_lf_map,
                            sf_lf_map, lf_metadata_counts)
        test_loss, test_acc = run_test_epoch(args, test_batcher, model,
                                             loss_func, token_vocab,
                                             metadata_vocab,
                                             sf_tokenized_lf_map, sf_lf_map,
                                             lf_metadata_counts)
        analyze(args,
                test_batcher,
                model,
                sf_lf_map,
                loss_func,
                token_vocab,
                metadata_vocab,
                sf_tokenized_lf_map,
                lf_metadata_counts,
                results_dir=results_dir)

        lowest_test_loss = min(lowest_test_loss, test_loss)
        highest_test_acc = max(highest_test_acc, test_acc)
        if lowest_test_loss == test_loss:
            best_weights = model.state_dict()
            best_epoch = epoch
    print('Loading weights from {} epoch to perform error analysis'.format(
        best_epoch))
    model.load_state_dict(best_weights)
    metrics = analyze(args,
                      test_batcher,
                      model,
                      sf_lf_map,
                      loss_func,
                      token_vocab,
                      metadata_vocab,
                      sf_tokenized_lf_map,
                      lf_metadata_counts,
                      results_dir=results_dir)
    metrics['log_loss'] = lowest_test_loss
    metrics['accuracy'] = highest_test_acc
    return metrics