Ejemplo n.º 1
0
    def __init__(self):
        super(AttBasicModel, self).__init__()
        self.ss_prob = 0.0  # Schedule sampling probability
        self.vocab_size = cfg.MODEL.VOCAB_SIZE + 1  # include <BOS>/<EOS>
        self.att_dim = cfg.MODEL.ATT_FEATS_EMBED_DIM \
            if cfg.MODEL.ATT_FEATS_EMBED_DIM > 0 else cfg.MODEL.ATT_FEATS_DIM

        # word embed
        sequential = [nn.Embedding(self.vocab_size, cfg.MODEL.WORD_EMBED_DIM)]
        sequential.append(utils.activation(cfg.MODEL.WORD_EMBED_ACT))
        if cfg.MODEL.WORD_EMBED_NORM == True:
            sequential.append(nn.LayerNorm(cfg.MODEL.WORD_EMBED_DIM))
        if cfg.MODEL.DROPOUT_WORD_EMBED > 0:
            sequential.append(nn.Dropout(cfg.MODEL.DROPOUT_WORD_EMBED))
        self.word_embed = nn.Sequential(*sequential)

        # global visual feat embed
        sequential = []
        if cfg.MODEL.GVFEAT_EMBED_DIM > 0:
            sequential.append(
                nn.Linear(cfg.MODEL.GVFEAT_DIM, cfg.MODEL.GVFEAT_EMBED_DIM))
        sequential.append(utils.activation(cfg.MODEL.GVFEAT_EMBED_ACT))
        if cfg.MODEL.DROPOUT_GV_EMBED > 0:
            sequential.append(nn.Dropout(cfg.MODEL.DROPOUT_GV_EMBED))
        self.gv_feat_embed = nn.Sequential(
            *sequential) if len(sequential) > 0 else None

        # attention feats embed
        sequential = []
        if cfg.MODEL.ATT_FEATS_EMBED_DIM > 0:
            sequential.append(
                nn.Linear(cfg.MODEL.ATT_FEATS_DIM,
                          cfg.MODEL.ATT_FEATS_EMBED_DIM))
        sequential.append(utils.activation(cfg.MODEL.ATT_FEATS_EMBED_ACT))
        if cfg.MODEL.DROPOUT_ATT_EMBED > 0:
            sequential.append(nn.Dropout(cfg.MODEL.DROPOUT_ATT_EMBED))
        if cfg.MODEL.ATT_FEATS_NORM == True:
            sequential.append(torch.nn.LayerNorm(
                cfg.MODEL.ATT_FEATS_EMBED_DIM))
        self.att_embed = nn.Sequential(
            *sequential) if len(sequential) > 0 else None

        self.dropout_lm = nn.Dropout(
            cfg.MODEL.DROPOUT_LM) if cfg.MODEL.DROPOUT_LM > 0 else None
        self.logit = nn.Linear(cfg.MODEL.RNN_SIZE, self.vocab_size)
        self.p_att_feats = nn.Linear(self.att_dim, cfg.MODEL.ATT_HIDDEN_SIZE) \
            if cfg.MODEL.ATT_HIDDEN_SIZE > 0 else None

        # bilinear
        if cfg.MODEL.BILINEAR.DIM > 0:
            self.p_att_feats = None
            self.encoder_layers = blocks.create(
                cfg.MODEL.BILINEAR.ENCODE_BLOCK,
                embed_dim=cfg.MODEL.BILINEAR.DIM,
                att_type=cfg.MODEL.BILINEAR.ATTTYPE,
                att_heads=cfg.MODEL.BILINEAR.HEAD,
                att_mid_dim=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DIM,
                att_mid_drop=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DROPOUT,
                dropout=cfg.MODEL.BILINEAR.ENCODE_DROPOUT,
                layer_num=cfg.MODEL.BILINEAR.ENCODE_LAYERS)
Ejemplo n.º 2
0
    def __init__(self, embed_dim, att_type, att_heads, att_mid_dim,
                 att_mid_drop, dropout, layer_num):
        super(LowRankBilinearEncBlock, self).__init__()

        self.layers = nn.ModuleList([])
        self.bifeat_emb = nn.ModuleList([])
        self.layer_norms = nn.ModuleList([])
        for _ in range(layer_num):
            sublayer = LowRankBilinearLayer(embed_dim=embed_dim,
                                            att_type=att_type,
                                            att_heads=att_heads,
                                            att_mid_dim=att_mid_dim,
                                            att_mid_drop=att_mid_drop,
                                            dropout=dropout)
            self.layers.append(sublayer)

            self.bifeat_emb.append(
                nn.Sequential(
                    nn.Linear(2 * embed_dim, embed_dim),
                    utils.activation(cfg.MODEL.BILINEAR.BIFEAT_EMB_ACT),
                    nn.Dropout(cfg.MODEL.BILINEAR.ENCODE_BIFEAT_EMB_DROPOUT)))

            self.layer_norms.append(torch.nn.LayerNorm(embed_dim))

        self.proj = nn.Linear(embed_dim * (layer_num + 1), embed_dim)
        self.layer_norm = torch.nn.LayerNorm(cfg.MODEL.BILINEAR.DIM)
Ejemplo n.º 3
0
    def __init__(self, args, submodel):
        super(DWEXTransformer, self).__init__()
        self.vocab_size = cfg.MODEL.VOCAB_SIZE + 1
        # image pretrained
        # self.image_pretrained_models, self.input_visual_feats = ImageClassification.image_features(
        #     'densenet', fixed_weight=False, pretrained_model=cfg.MODEL.PretrainedImageModel)
        if args.dataset_name == 'IUXRAY':
            num_images = 2
            self.get_visual_features = self.forward_iuxray
        elif args.dataset_name == 'MIMICCXR':
            num_images = 1
            self.get_visual_features = self.forward_mimiccxr
        elif args.dataset_name == 'MIMICCXR_MultiImages':
            num_images = 2
            self.get_visual_features = self.forward_mimiccxr

        # att_feats encoder
        cnn_sequential = []
        self.input_visual_feats = 1024
        cnn_sequential.append(
            nn.Linear(self.input_visual_feats * num_images,
                      cfg.MODEL.ATT_FEATS_EMBED_DIM))
        cnn_sequential.append(utils.activation(cfg.MODEL.ATT_FEATS_EMBED_ACT))
        if cfg.MODEL.ATT_FEATS_NORM == True:
            cnn_sequential.append(nn.LayerNorm(cfg.MODEL.ATT_FEATS_EMBED_DIM))
        if cfg.MODEL.DROPOUT_ATT_EMBED > 0:
            cnn_sequential.append(nn.Dropout(cfg.MODEL.DROPOUT_ATT_EMBED))

        gcn_sequential = copy.deepcopy(cnn_sequential)

        self.cnn_embed = nn.Sequential(
            *cnn_sequential) if len(cnn_sequential) > 0 else None
        self.gcn_embed = nn.Sequential(
            *gcn_sequential) if len(gcn_sequential) > 0 else None

        self.encoder = Encoder(
            embed_dim=cfg.MODEL.BILINEAR.DIM,
            dropout=cfg.MODEL.BILINEAR.ENCODE_DROPOUT,
            att_type=cfg.MODEL.BILINEAR.ATTTYPE,
            att_heads=cfg.MODEL.BILINEAR.HEAD,
            att_mid_dim=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DIM,
            att_mid_drop=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DROPOUT,
            bifeat_emb_act=cfg.MODEL.BILINEAR.BIFEAT_EMB_ACT,
            bifeat_emb_drop=cfg.MODEL.BILINEAR.ENCODE_BIFEAT_EMB_DROPOUT,
            ff_dropout=cfg.MODEL.BILINEAR.ENCODE_FF_DROPOUT,
            layer_num=cfg.MODEL.BILINEAR.ENCODE_LAYERS)

        self.decoder = Decoder(
            vocab_size=self.vocab_size,
            embed_dim=cfg.MODEL.BILINEAR.DIM,
            dropout=cfg.MODEL.BILINEAR.DECODE_DROPOUT,
            att_type=cfg.MODEL.BILINEAR.ATTTYPE,
            att_heads=cfg.MODEL.BILINEAR.HEAD,
            att_mid_dim=cfg.MODEL.BILINEAR.DECODE_ATT_MID_DIM,
            att_mid_drop=cfg.MODEL.BILINEAR.DECODE_ATT_MID_DROPOUT,
            bifeat_emb_act=cfg.MODEL.BILINEAR.BIFEAT_EMB_ACT,
            bifeat_emb_drop=cfg.MODEL.BILINEAR.DECODE_BIFEAT_EMB_DROPOUT,
            ff_dropout=cfg.MODEL.BILINEAR.DECODE_FF_DROPOUT,
            layer_num=cfg.MODEL.BILINEAR.DECODE_LAYERS)
        self.submodel = submodel
Ejemplo n.º 4
0
    def __init__(
        self, 
        embed_dim, 
        dropout, 
        att_type, 
        att_heads, 
        att_mid_dim, 
        att_mid_drop,
        bifeat_emb_act, 
        bifeat_emb_drop, 
        ff_dropout
    ):
        super(EncoderLayer, self).__init__()
        self.encoder_attn = LowRank(
            embed_dim = embed_dim, 
            att_type = att_type, 
            att_heads = att_heads, 
            att_mid_dim = att_mid_dim, 
            att_mid_drop = att_mid_drop)
        self.dropout = nn.Dropout(dropout)

        self.bifeat_emb = nn.Sequential(
            nn.Linear(2 * embed_dim, embed_dim),
            utils.activation(bifeat_emb_act),
            nn.Dropout(bifeat_emb_drop)
        )
        self.layer_norm = torch.nn.LayerNorm(embed_dim)

        self.ff_layer = blocks.create(
            'FeedForward',
            embed_dim = embed_dim, 
            ffn_embed_dim = embed_dim * 4, 
            relu_dropout = ff_dropout, 
            dropout = ff_dropout)
Ejemplo n.º 5
0
    def __init__(self, vocab_size, embed_dim, dropout, att_type, att_heads,
                 att_mid_dim, att_mid_drop, bifeat_emb_act, bifeat_emb_drop,
                 ff_dropout, layer_num):
        super(Decoder, self).__init__()
        self.att_heads = att_heads
        self.layers = nn.ModuleList([])
        self.embed_dim = embed_dim
        for i in range(layer_num):
            sublayer = DecoderLayer(embed_dim=embed_dim,
                                    dropout=dropout,
                                    att_type=att_type,
                                    att_heads=att_heads,
                                    att_mid_dim=att_mid_dim,
                                    att_mid_drop=att_mid_drop,
                                    bifeat_emb_act=bifeat_emb_act,
                                    bifeat_emb_drop=bifeat_emb_drop,
                                    ff_dropout=ff_dropout,
                                    last_layer=(i == layer_num - 1))
            self.layers.append(sublayer)

        self.dropout = nn.Dropout(cfg.MODEL.DROPOUT_WORD_EMBED)
        self.embed_tokens = nn.Embedding(vocab_size, embed_dim)
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEncoding(
            embed_dim, cfg.MODEL.TRANSFORMER.PE_MAX_LEN)

        self.layer_norm_word = torch.nn.LayerNorm(embed_dim)
        self.generator = nn.Linear(embed_dim, vocab_size)

        self.wbil1 = nn.Sequential(nn.Linear(embed_dim, embed_dim),
                                   utils.activation(cfg.MODEL.BILINEAR.ACT),
                                   torch.nn.LayerNorm(embed_dim))
        self.wbil2 = nn.Sequential(nn.Linear(embed_dim, embed_dim),
                                   utils.activation(cfg.MODEL.BILINEAR.ACT),
                                   torch.nn.LayerNorm(embed_dim))
        self.wbi_drop = nn.Dropout(cfg.MODEL.BILINEAR.DECODE_DROPOUT)
        self.dropout_lm = nn.Dropout(cfg.MODEL.DROPOUT_LM)

        self.proj_norm = nn.Sequential(
            nn.Linear(embed_dim * (layer_num + 1), 2 * embed_dim), nn.GLU(),
            torch.nn.LayerNorm(embed_dim))

        self.clear_buffer()
Ejemplo n.º 6
0
    def __init__(self, embed_dim, att_type, att_heads, att_mid_dim,
                 att_mid_drop):
        super(LowRank, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = att_heads
        self.head_dim = embed_dim // self.num_heads
        self.scaling = self.head_dim**-0.5
        output_dim = 2 * embed_dim if cfg.MODEL.BILINEAR.ACT == 'GLU' else embed_dim

        sequential = []
        sequential.append(nn.Linear(embed_dim, output_dim))
        act = utils.activation(cfg.MODEL.BILINEAR.ACT)
        if act is not None:
            sequential.append(act)
        sequential.append(torch.nn.GroupNorm(self.num_heads, embed_dim))
        self.in_proj_q = nn.Sequential(*sequential)

        sequential = []
        sequential.append(nn.Linear(embed_dim, output_dim))
        act = utils.activation(cfg.MODEL.BILINEAR.ACT)
        if act is not None:
            sequential.append(act)
        sequential.append(torch.nn.GroupNorm(self.num_heads, embed_dim))
        self.in_proj_k = nn.Sequential(*sequential)

        sequential = []
        sequential.append(nn.Linear(embed_dim, output_dim))
        act = utils.activation(cfg.MODEL.BILINEAR.ACT)
        if act is not None:
            sequential.append(act)
        sequential.append(torch.nn.GroupNorm(self.num_heads, embed_dim))
        self.in_proj_v1 = nn.Sequential(*sequential)

        sequential = []
        sequential.append(nn.Linear(embed_dim, output_dim))
        act = utils.activation(cfg.MODEL.BILINEAR.ACT)
        if act is not None:
            sequential.append(act)
        sequential.append(torch.nn.GroupNorm(self.num_heads, embed_dim))
        self.in_proj_v2 = nn.Sequential(*sequential)

        self.attn_net = layers.create(att_type, att_mid_dim, att_mid_drop)
        self.clear_buffer()
Ejemplo n.º 7
0
    def __init__(
        self, 
        embed_dim, 
        dropout, 
        att_type, 
        att_heads, 
        att_mid_dim, 
        att_mid_drop,
        bifeat_emb_act, 
        bifeat_emb_drop, 
        ff_dropout, 
        last_layer = False
    ):
        super(DecoderLayer, self).__init__()
        self.last_layer = last_layer
        self.word_attn = LowRank(
            embed_dim = embed_dim, 
            att_type = att_type, 
            att_heads = att_heads, 
            att_mid_dim = att_mid_dim, 
            att_mid_drop = att_mid_drop)
        self.word_dropout = nn.Dropout(dropout)

        self.cross_att = LowRank(
            embed_dim = embed_dim, 
            att_type = att_type, 
            att_heads = att_heads, 
            att_mid_dim = att_mid_dim, 
            att_mid_drop = att_mid_drop)
        self.cross_dropout = nn.Dropout(dropout)
        self.layer_norm_cross = torch.nn.LayerNorm(embed_dim)

        if self.last_layer == False:
            self.bifeat_emb = nn.Sequential(
                nn.Linear(2 * embed_dim, embed_dim),
                utils.activation(bifeat_emb_act),
                nn.Dropout(bifeat_emb_drop)
            )
            self.layer_norm_x = torch.nn.LayerNorm(embed_dim)

            self.ff_layer = blocks.create(
                'FeedForward',
                embed_dim = embed_dim, 
                ffn_embed_dim = embed_dim * 4, 
                relu_dropout = ff_dropout, 
                dropout = ff_dropout)

        self.layer_norm_gx = torch.nn.LayerNorm(embed_dim)
Ejemplo n.º 8
0
    def __init__(self):
        super(XTransformer, self).__init__()
        self.vocab_size = cfg.MODEL.VOCAB_SIZE + 1

        # att_feats encoder
        sequential = []
        sequential.append(
            nn.Linear(cfg.MODEL.ATT_FEATS_DIM, cfg.MODEL.ATT_FEATS_EMBED_DIM))
        sequential.append(utils.activation(cfg.MODEL.ATT_FEATS_EMBED_ACT))
        if cfg.MODEL.ATT_FEATS_NORM == True:
            sequential.append(nn.LayerNorm(cfg.MODEL.ATT_FEATS_EMBED_DIM))
        if cfg.MODEL.DROPOUT_ATT_EMBED > 0:
            sequential.append(nn.Dropout(cfg.MODEL.DROPOUT_ATT_EMBED))
        self.att_embed = nn.Sequential(
            *sequential) if len(sequential) > 0 else None

        self.encoder = Encoder(
            embed_dim=cfg.MODEL.BILINEAR.DIM,
            dropout=cfg.MODEL.BILINEAR.ENCODE_DROPOUT,
            att_type=cfg.MODEL.BILINEAR.ATTTYPE,
            att_heads=cfg.MODEL.BILINEAR.HEAD,
            att_mid_dim=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DIM,
            att_mid_drop=cfg.MODEL.BILINEAR.ENCODE_ATT_MID_DROPOUT,
            bifeat_emb_act=cfg.MODEL.BILINEAR.BIFEAT_EMB_ACT,
            bifeat_emb_drop=cfg.MODEL.BILINEAR.ENCODE_BIFEAT_EMB_DROPOUT,
            ff_dropout=cfg.MODEL.BILINEAR.ENCODE_FF_DROPOUT,
            layer_num=cfg.MODEL.BILINEAR.ENCODE_LAYERS)

        self.decoder = Decoder(
            vocab_size=self.vocab_size,
            embed_dim=cfg.MODEL.BILINEAR.DIM,
            dropout=cfg.MODEL.BILINEAR.DECODE_DROPOUT,
            att_type=cfg.MODEL.BILINEAR.ATTTYPE,
            att_heads=cfg.MODEL.BILINEAR.HEAD,
            att_mid_dim=cfg.MODEL.BILINEAR.DECODE_ATT_MID_DIM,
            att_mid_drop=cfg.MODEL.BILINEAR.DECODE_ATT_MID_DROPOUT,
            bifeat_emb_act=cfg.MODEL.BILINEAR.BIFEAT_EMB_ACT,
            bifeat_emb_drop=cfg.MODEL.BILINEAR.DECODE_BIFEAT_EMB_DROPOUT,
            ff_dropout=cfg.MODEL.BILINEAR.DECODE_FF_DROPOUT,
            layer_num=cfg.MODEL.BILINEAR.DECODE_LAYERS)