Beispiel #1
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.decoder_opt = opt['answer_opt']
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if decoder_opt == 1:
                out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout)
                self.scoring_list.append(out_proj)
            else:
                out_proj = nn.Linear(self.bert_config.hidden_size, lab)
                self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
Beispiel #2
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.encoder_type = opt['encoder_type']
        if opt['encoder_type'] == EncoderModelType.ROBERTA:
            from fairseq.models.roberta import RobertaModel
            self.bert = RobertaModel.from_pretrained(opt['init_checkpoint'])
            hidden_size = self.bert.args.encoder_embed_dim
            self.pooler = LinearPooler(hidden_size)
        else:
            self.bert_config = BertConfig.from_dict(opt)
            self.bert = BertModel(self.bert_config)
            hidden_size = self.bert_config.hidden_size

        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        self.decoder_opt = opt['answer_opt']
        self.task_types = opt["task_types"]
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            task_type = self.task_types[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if task_type == TaskType.Span:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SeqenceLabeling:
                out_proj = nn.Linear(hidden_size, lab)
            elif task_type == TaskType.MaskLM:
                if opt['encoder_type'] == EncoderModelType.ROBERTA:
                    # TODO: xiaodl
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
                else:
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
            else:
                if decoder_opt == 1:
                    out_proj = SANClassifier(hidden_size,
                                             hidden_size,
                                             lab,
                                             opt,
                                             prefix='answer',
                                             dropout=dropout)
                else:
                    out_proj = nn.Linear(hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
Beispiel #3
0
    def __init__(self, opt, bert_config=None,
                 use_parse=False, embedding_matrix=None, token2idx=None, stx_parse_dim=None, unked_words=None,
                 use_generic_features=False, num_generic_features=None, use_domain_features=False, num_domain_features=None, feature_dim=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = []
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        self.use_parse = use_parse
        self.stx_parse_dim = stx_parse_dim
        self.use_generic_features = use_generic_features
        self.use_domain_features = use_domain_features

        clf_dim = self.bert_config.hidden_size
        if self.use_parse:
            self.treelstm = BinaryTreeLSTM(self.stx_parse_dim, embedding_matrix.clone(), token2idx, unked_words=unked_words)
            parse_clf_dim = self.stx_parse_dim * 2
            clf_dim += parse_clf_dim
            self.parse_clf = nn.Linear(parse_clf_dim, labels[0])
        if self.use_generic_features:
            self.generic_feature_proj = nn.Linear(num_generic_features, num_generic_features * feature_dim)
            generic_feature_clf_dim = num_generic_features * feature_dim
            clf_dim += generic_feature_clf_dim
            self.generic_feature_clf = nn.Linear(generic_feature_clf_dim, labels[0])
        if self.use_domain_features:
            self.domain_feature_proj = nn.Linear(num_domain_features, num_domain_features * feature_dim)
            domain_feature_clf_dim = num_domain_features * feature_dim
            clf_dim += domain_feature_clf_dim
            self.domain_feature_clf = nn.Linear(domain_feature_clf_dim, labels[0])

        assert len(labels) == 1
        for task, lab in enumerate(labels):
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            out_proj = nn.Linear(self.bert_config.hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
        if embedding_matrix is not None and self.use_parse:
            self.treelstm.embedding.weight = nn.Parameter(embedding_matrix)  # set again b/c self._my_init() overwrites it
Beispiel #4
0
    def __init__(self,
                 args,
                 sample_datum,
                 class_names=None,
                 use_cuda=torch.cuda.is_available()):
        print("curr path:", args.run_dir)
        assert os.path.isdir(args.run_dir)
        if class_names is None: class_names = {}

        self.run_dir = args.run_dir

        device = torch.device("cuda" if use_cuda else "cpu")
        n_gpu = 0 if not use_cuda else torch.cuda.device_count()
        self.device = device
        self.n_gpu = n_gpu

        self.class_names = class_names
        task_weights, task_class_weights = None, {
            'tasks_binary_multilabel':
            torch.ones(len(self.class_names['tasks_binary_multilabel'])).to(
                self.device)
        }
        if args.task_weights_filepath:
            assert os.path.isfile(args.task_weights_filepath
                                  ), "Task weights file does not exist!"
            assert args.regression_task_weight == 1, "Can't set both regression task weight and file!"
            assert not args.ablate, "Can't both use a file and an ablation code."

            with open(args.task_weights_filepath, mode='r') as f:
                task_weights = json.loads(f.read())
        elif args.regression_task_weight != 1:
            assert not os.path.isfile(
                args.task_weights_filepath
            ), "Can't use both a file and a reg. weight!"
            assert not args.ablate, "Can't both use a reg. weight and an ablation code!"

            task_weights = {t: 1 for t in ALL_TASKS}
            task_weights['next_timepoint'] = args.regression_task_weight
        elif args.ablate:
            assert not os.path.isfile(
                args.task_weights_filepath
            ), "Can't use both an ablation and a file!"
            assert args.regression_task_weight == 1, "Can't set both regression task weight and ablation!"

            print("Ablating!")

            task_weights, task_class_weights = self.ablate(args.ablate,
                                                           post_init=False)
        else:
            task_weights, task_class_weights = self.ablate(None,
                                                           post_init=False)

        self.add_cls_analog = False
        if args.do_add_cls_analog:
            assert args.modeltype.lower() not in (
                'cnn', 'gru', 'linear'), "CLS analog only works w/ BERT"
            self.add_cls_analog = True
            self.cls_embed = nn.Parameter(data=torch.randn(
                1, 1, args.hidden_size),
                                          requires_grad=True)
        else:
            self.cls_embed = None

        # No batch size as this is just accessed via dataset[#].
        ts_feat_dim, statics_feat_dim = sample_datum['ts'].shape[
            1], sample_datum['statics'].shape[0]
        pred_dim = sample_datum['next_timepoint'].shape

        config = {
            "attention_probs_dropout_prob":
            0.1,
            "hidden_act":
            "gelu",
            "hidden_dropout_prob":
            args.hidden_dropout_prob,
            "pred_dim":
            pred_dim,
            "hidden_size":
            args.hidden_size,
            "initializer_range":
            0.02,
            "intermediate_size":
            args.intermediate_size,
            "max_position_embeddings":
            args.max_seq_len + 1 if self.add_cls_analog else args.max_seq_len,
            "num_attention_heads":
            args.num_attention_heads,
            "num_hidden_layers":
            args.num_hidden_layers,
            "type_vocab_size":
            2,
            "vocab_size":
            None  # TODO(mmd): Omit this from config...
        }
        bert_config = BertConfig.from_dict(config)
        bert_config_filepath = os.path.join(args.run_dir, CONFIG_FILENAME)

        if not os.path.isfile(bert_config_filepath) or args.do_overwrite:
            bert_config.to_json_file(
                os.path.join(args.run_dir, 'bert_config.json'))

        # default arg is self attention timeseries

        # alternative is CNN
        if args.modeltype.lower() == 'cnn':
            model = CNN(
                bert_config,
                data_shape=[args.max_seq_len, args.hidden_size],
                use_cuda=torch.cuda.is_available(),
                conv_layers=list(args.num_filters),
                kernel_sizes=list(args.kernel_sizes),
                fc_layer_sizes=list(args.fc_layer_sizes),
                pooling_method=args.pooling_method,
                pooling_kernel_size=args.pooling_kernel_size,
                pooling_stride=args.pooling_stride,
                conv_layers_per_pool=args.conv_layers_per_pool,
                task_weights=task_weights,
                task_class_weights=task_class_weights,
            )
        elif args.modeltype.lower() == 'gru':
            model = GRUModel(
                bert_config,
                data_shape=[args.max_seq_len, args.hidden_size],
                use_cuda=torch.cuda.is_available(),
                hidden_dim=args.gru_hidden_layer_size,
                num_layers=args.gru_num_hidden,
                bidirectional=args.do_bidirectional,
                task_weights=task_weights,
                pooling_method=args.gru_pooling_method,
                task_class_weights=task_class_weights,
            )
        elif args.modeltype.lower() == 'linear':
            model = LinearModel(
                bert_config,
                data_shape=[args.max_seq_len, args.hidden_size],
                use_cuda=torch.cuda.is_available(),
                task_weights=task_weights,
                task_class_weights=task_class_weights,
            )
        else:
            model = SelfAttentionTimeseries(
                bert_config,
                use_cuda=torch.cuda.is_available(),
                task_weights=task_weights,
                task_class_weights=task_class_weights,
            )

        # TODO(mmd): Need to also load ts_projector.
        ts_projector = nn.Linear(ts_feat_dim, bert_config.hidden_size)
        statics_projector = nn.Linear(statics_feat_dim,
                                      bert_config.hidden_size)

        model.apply(weight_init)

        for m in (model, ts_projector, statics_projector, self.cls_embed):
            if m is None: continue
            m.to(device)
            if n_gpu > 1: m = torch.nn.DataParallel(m).cuda()

        parameters = (list(model.parameters()) +
                      list(ts_projector.parameters()) +
                      list(statics_projector.parameters()))
        if self.add_cls_analog: parameters += [self.cls_embed]

        if args.notes == 'integrate_note_bert':
            # initialize pretrained clinical note bert model
            cache_dir = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE,
                                     'distributed_-1')

            model_location = BERT_MODEL_LOCATION

            note_embedding_model = BertModel.from_pretrained(
                model_location,
                cache_dir=cache_dir,
            )
            notes_projector = nn.Linear(768, bert_config.hidden_size)

            note_embedding_model.to(device)
            notes_projector.to(device)

            if n_gpu > 1:
                note_embedding_model = torch.nn.DataParallel(
                    note_embedding_model,
                    device_ids=list(range(torch.cuda.device_count()))).cuda()
                notes_projector = torch.nn.DataParallel(
                    notes_projector,
                    device_ids=list(range(torch.cuda.device_count()))).cuda()

            if args.do_train_note_bert:
                parameters = [
                    {
                        "params": parameters,
                        "lr": args.learning_rate
                    },
                    {
                        "params": note_embedding_model.parameters(),
                        "lr": args.learning_rate / args.note_bert_lr_reduce
                    },
                    {
                        "params": notes_projector.parameters(),
                        "lr": args.learning_rate / args.note_bert_lr_reduce
                    },
                ]
        else:
            note_embedding_model = None
            notes_projector = None

        self.bert_config = bert_config
        self.model = model
        self.ts_projector = ts_projector
        self.statics_projector = statics_projector
        self.notes_projector = notes_projector
        self.note_embedding_model = note_embedding_model
        self.parameters = parameters

        self.trainable_models = [
            self.model, self.ts_projector, self.statics_projector
        ]
        if args.do_train_note_bert:
            self.trainable_models.extend(
                [self.notes_projector, self.note_embedding_model])

        self.n_gpu = n_gpu
        self.device = device
        self.notes = args.notes

        self.run_dir = args.run_dir
        self.save_name = args.model_file_template.format(**args.to_dict())
    def __init__(self,
                 args,
                 sample_datum,
                 class_names=None,
                 verbose=False,
                 use_cuda=torch.cuda.is_available()):
        print("curr path:", args.run_dir)
        assert os.path.isdir(args.run_dir)
        if class_names is None: class_names = {}

        self.run_dir = args.run_dir
        self.do_eicu = args.do_eicu

        self.debug = False

        device = torch.device("cuda" if use_cuda else "cpu")
        n_gpu = 0 if not use_cuda else torch.cuda.device_count()
        self.device = device
        self.n_gpu = n_gpu

        self.do_masked_imputation = args.do_masked_imputation
        self.do_fake_masked_imputation_shape = args.do_fake_masked_imputation_shape
        assert not (self.do_fake_masked_imputation_shape and self.do_masked_imputation), \
            "Can't fake and mask!"

        self.class_names = class_names
        task_weights, task_class_weights = None, {
            'tasks_binary_multilabel':
            torch.ones(len(self.class_names['tasks_binary_multilabel'])).to(
                self.device)
        }

        # We can handle zeroing out the regression task through the ablation interface.
        if args.regression_task_weight in (0, None):
            if args.ablate:
                if isinstance(args.ablate, str): args.ablate = [args.ablate]
                if 'next_timepoint' not in args.ablate:
                    args.ablate.append('next_timepoint')
            else:
                args.ablate = ['next_timepoint']

        if args.task_weights_filepath:
            print("filepath")
            assert os.path.isfile(args.task_weights_filepath
                                  ), "Task weights file does not exist!"
            assert args.regression_task_weight == 1, "Can't set both regression task weight and file!"
            assert not args.ablate, "Can't both use a file and an ablation code."

            with open(args.task_weights_filepath, mode='r') as f:
                task_weights = json.loads(f.read())
        elif args.regression_task_weight not in (1, 0, None):
            print("regression weight")
            assert not os.path.isfile(
                args.task_weights_filepath
            ), "Can't use both a file and a reg. weight!"
            # assert not args.ablate, "Can't both use a reg. weight and an ablation code!" # this is commented out because now, by default regression is turned off.

            task_weights = {
                t: 1 if t in ALL_TASKS_EICU or not self.do_eicu else 0
                for t in ALL_TASKS
            }
            task_weights['next_timepoint'] = args.regression_task_weight
        elif args.ablate:
            assert not os.path.isfile(
                args.task_weights_filepath
            ), "Can't use both an ablation and a file!"
            assert args.regression_task_weight in (None, 0, 1), \
                "Can't set both regression task weight and ablation!"
            if args.regression_task_weight in (None, 0):
                assert 'next_timepoint' in args.ablate or 'next_timepoint_info' in args.ablate, \
                    "Must ablate the regression task with a weighting of 0! Should happen automatically."

            print("Ablating!")

            task_weights, task_class_weights = self.ablate(args.ablate,
                                                           post_init=False)
        else:
            print("else")
            task_weights, task_class_weights = self.ablate(None,
                                                           post_init=False)

        self.add_cls_analog = False
        if args.do_add_cls_analog:
            assert args.modeltype.lower() not in (
                'cnn', 'gru', 'linear'), "CLS analog only works w/ BERT"
            self.add_cls_analog = True
            self.cls_embed = nn.Parameter(data=torch.randn(
                1, 1, args.hidden_size),
                                          requires_grad=True)
        else:
            self.cls_embed = None

        # No batch size as this is just accessed via dataset[#].
        ts_feat_dim, statics_feat_dim = sample_datum['ts'].shape[
            1], sample_datum['statics'].shape[0]
        pred_dim = sample_datum['next_timepoint'].shape

        # For the is-masked bit.
        if self.do_masked_imputation or self.do_fake_masked_imputation_shape:
            ts_feat_dim += 1

        config = {
            "attention_probs_dropout_prob":
            0.1,
            "hidden_act":
            "gelu",
            "hidden_dropout_prob":
            args.hidden_dropout_prob,
            "pred_dim":
            pred_dim,
            "hidden_size":
            args.hidden_size,
            "initializer_range":
            0.02,
            "intermediate_size":
            args.intermediate_size,
            "max_position_embeddings":
            args.max_seq_len + 1 if self.add_cls_analog else args.max_seq_len,
            "num_attention_heads":
            args.num_attention_heads,
            "num_hidden_layers":
            args.num_hidden_layers,
            "type_vocab_size":
            2,
            "vocab_size":
            None  # TODO(mmd): Omit this from config...
        }
        bert_config = BertConfig.from_dict(config)
        bert_config_filepath = os.path.join(args.run_dir, CONFIG_FILENAME)

        if not os.path.isfile(bert_config_filepath) or args.do_overwrite:
            bert_config.to_json_file(
                os.path.join(args.run_dir, 'bert_config.json'))

        # default arg is self attention timeseries

        # alternative is CNN
        assert args.modeltype.lower(
        ) == 'gru', "Only GRU is supported in this version."
        model = GRUModel(
            bert_config,
            data_shape=[args.max_seq_len, args.hidden_size],
            use_cuda=torch.cuda.is_available(),
            hidden_dim=args.gru_hidden_layer_size,
            num_layers=args.gru_num_hidden,
            bidirectional=args.do_bidirectional,
            task_weights=task_weights,
            pooling_method=args.gru_pooling_method,
            task_class_weights=task_class_weights,
            verbose=verbose,
            do_eicu=self.do_eicu,
        )

        # TODO(mmd): Need to also load ts_projector.
        ts_projector = nn.Linear(ts_feat_dim, bert_config.hidden_size)
        statics_projector = nn.Linear(statics_feat_dim,
                                      bert_config.hidden_size)

        model.apply(weight_init)

        for m in (model, ts_projector, statics_projector, self.cls_embed):
            if m is None: continue
            m.to(device)
            if n_gpu > 1: m = torch.nn.DataParallel(m).cuda()

        parameters = (list(model.parameters()) +
                      list(ts_projector.parameters()) +
                      list(statics_projector.parameters()))
        if self.add_cls_analog: parameters += [self.cls_embed]

        if args.notes == 'integrate_note_bert':
            # initialize pretrained clinical note bert model
            cache_dir = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE,
                                     'distributed_-1')

            model_location = BERT_MODEL_LOCATION

            note_embedding_model = BertModel.from_pretrained(
                model_location,
                cache_dir=cache_dir,
            )
            notes_projector = nn.Linear(768, bert_config.hidden_size)

            note_embedding_model.to(device)
            notes_projector.to(device)

            if n_gpu > 1:
                note_embedding_model = torch.nn.DataParallel(
                    note_embedding_model,
                    device_ids=list(range(torch.cuda.device_count()))).cuda()
                notes_projector = torch.nn.DataParallel(
                    notes_projector,
                    device_ids=list(range(torch.cuda.device_count()))).cuda()

            if args.do_train_note_bert:
                parameters = [
                    {
                        "params": parameters,
                        "lr": args.learning_rate
                    },
                    {
                        "params": note_embedding_model.parameters(),
                        "lr": args.learning_rate / args.note_bert_lr_reduce
                    },
                    {
                        "params": notes_projector.parameters(),
                        "lr": args.learning_rate / args.note_bert_lr_reduce
                    },
                ]
        else:
            note_embedding_model = None
            notes_projector = None

        self.bert_config = bert_config
        self.model = model
        self.ts_projector = ts_projector
        self.statics_projector = statics_projector
        self.notes_projector = notes_projector
        self.note_embedding_model = note_embedding_model
        self.parameters = parameters

        self.trainable_models = [
            self.model, self.ts_projector, self.statics_projector
        ]
        if args.do_train_note_bert:
            self.trainable_models.extend(
                [self.notes_projector, self.note_embedding_model])

        self.n_gpu = n_gpu
        self.device = device
        self.notes = args.notes

        self.run_dir = args.run_dir
        self.save_name = args.model_file_template.format(**args.to_dict())
Beispiel #6
0
def create_task(args):
    task_name = "TACRED"

    bert_model = BertModel.from_pretrained(args.bert_model, cache_dir="./cache/")

    bert_output_dim = 768 if "base" in args.bert_model else 1024

    config = ENT_BERT_ENCODER_CONFIG
    if (
        args.ent_emb_file is not None
        or args.static_ent_emb_file is not None
        or args.type_emb_file is not None
        or args.rel_emb_file is not None
    ):
        config["num_hidden_layers"] = args.kg_encoder_layer
        output_size = ENT_BERT_ENCODER_CONFIG["hidden_size"]
    else:
        output_size = bert_output_dim
        ENT_BERT_ENCODER_CONFIG["hidden_size"] = output_size
    config = BertConfig.from_dict(config)
    logger.info(config)
    encoder = EntBertEncoder(
        config,
        bert_output_dim,
        output_size,
        args.ent_emb_file,
        args.static_ent_emb_file,
        args.type_emb_file,
        args.rel_emb_file,
        tanh=args.tanh,
        norm=args.norm,
    )

    task = EmmentalTask(
        name=task_name,
        module_pool=nn.ModuleDict(
            {
                "bert": bert_model,
                "encoder": encoder,
                f"{task_name}_pred_head": nn.Linear(
                    output_size, len(LABEL_TO_ID.keys())
                ),
            }
        ),
        task_flow=[
            {
                "name": "bert",
                "module": "bert",
                "inputs": [
                    ("_input_", "token_ids"),
                    ("_input_", "token_segments"),
                    ("_input_", "token_masks"),
                ],
            },
            {
                "name": "encoder",
                "module": "encoder",
                "inputs": [
                    ("bert", 0),
                    ("_input_", "token_ent_ids"),
                    ("_input_", "token_static_ent_ids"),
                    ("_input_", "token_type_ent_ids"),
                    ("_input_", "token_rel_ent_ids"),
                    ("_input_", "token_masks"),
                ],
            },
            {
                "name": f"{task_name}_pred_head",
                "module": f"{task_name}_pred_head",
                "inputs": [("encoder", 1)],
            },
        ],
        loss_func=partial(ce_loss, f"{task_name}_pred_head"),
        output_func=partial(output, f"{task_name}_pred_head"),
        scorer=Scorer(customize_metric_funcs={"tacred_scorer": tacred_scorer}),
    )

    return task