예제 #1
0
파일: model.py 프로젝트: zqyuan/r2c
    def __init__(self,
                 vocab: Vocabulary,
                 span_encoder: Seq2SeqEncoder,
                 reasoning_encoder: Seq2SeqEncoder,
                 input_dropout: float = 0.3,
                 hidden_dim_maxpool: int = 1024,
                 class_embs: bool=True,
                 reasoning_use_obj: bool=True,
                 reasoning_use_answer: bool=True,
                 reasoning_use_question: bool=True,
                 pool_reasoning: bool = True,
                 pool_answer: bool = True,
                 pool_question: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 ):
        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True, average_pool=True, semantic=class_embs, final_dim=512)
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(InputVariationalDropout(input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([d for d, to_pool in [(reasoning_encoder.get_output_dim(), self.pool_reasoning),
                                        (span_encoder.get_output_dim(), self.pool_answer),
                                        (span_encoder.get_output_dim(), self.pool_question)] if to_pool])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
예제 #2
0
 def __init__(self, v_dim, q_dim, num_hid, norm='weight', act='LeakyReLU', dropout=0.3):
     super(Att_Bilinear_layer2_keycat_textual_visual, self).__init__()
     # norm_layer = get_norm(norm)
     self.obj_attention = BilinearMatrixAttention(
         matrix_1_dim=q_dim,
         matrix_2_dim=v_dim,
         # matrix_2_dim=512
     )
     self.num_hid = num_hid
예제 #3
0
    def __init__(self,
                 pretrained=True,
                 average_pool=True,
                 semantic=True,
                 final_dim=512,
                 input_dropout=0.3,
                 reasoning_use_obj=True,
                 reasoning_use_answer=True,
                 reasoning_use_question=True,
                 pool_reasoning=True,
                 pool_answer=True,
                 pool_question=True):
        super().__init__()

        # self.detector = SimpleDetector(pretrained=pretrained,
        #   average_pool=average_pool, semantic=semantic, final_dim=final_dim)
        self.reasoning_encoder = TimeDistributed(
            PytorchSeq2SeqWrapper(
                torch.nn.LSTM(1536,
                              256,
                              num_layers=2,
                              batch_first=True,
                              bidirectional=True)))
        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None
        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=final_dim,
            matrix_2_dim=final_dim,
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=final_dim,
            matrix_2_dim=final_dim,
        )
        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question

        InitializerApplicator(self)
 def test_forward_does_a_bilinear_product(self):
     params = Params({"matrix_1_dim": 2, "matrix_2_dim": 2})
     bilinear = BilinearMatrixAttention.from_params(params)
     bilinear._weight_matrix = Parameter(torch.FloatTensor([[-0.3, 0.5], [2.0, -1.0]]))
     bilinear._bias = Parameter(torch.FloatTensor([0.1]))
     a_vectors = torch.FloatTensor([[[1, 1], [2, 2]]])
     b_vectors = torch.FloatTensor([[[1, 0], [0, 1]]])
     result = bilinear(a_vectors, b_vectors).detach().numpy()
     assert result.shape == (1, 2, 2)
     assert_almost_equal(result, [[[1.8, -0.4], [3.5, -0.9]]])
 def test_forward_does_a_bilinear_product(self):
     params = Params({
             'matrix_1_dim': 2,
             'matrix_2_dim': 2,
             })
     bilinear = BilinearMatrixAttention.from_params(params)
     bilinear._weight_matrix = Parameter(torch.FloatTensor([[-.3, .5], [2.0, -1.0]]))
     bilinear._bias = Parameter(torch.FloatTensor([.1]))
     a_vectors = torch.FloatTensor([[[1, 1], [2, 2]]])
     b_vectors = torch.FloatTensor([[[1, 0], [0, 1]]])
     result = bilinear(a_vectors, b_vectors).detach().numpy()
     assert result.shape == (1, 2, 2)
     assert_almost_equal(result, [[[1.8, -.4], [3.5, -.9]]])
예제 #6
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        attended_encoder: Seq2SeqEncoder,
        output_feedforward: FeedForward,
        regularizer: Optional[RegularizerApplicator] = None,
        detector_final_dim: int = 512,
        dropout: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        """

        :param vocab:
        :param text_field_embedder:
        :param encoder:
        :param attended_encoder:
        :param output_feedforward:
        :param regularizer:
        :param detector_final_dim:
        :param dropout:
        :param initializer:
        """
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder
        self.detector = ROIDetector(detector_final_dim)

        if dropout:
            self.dropout = nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim)

        self._attended_encoder = attended_encoder

        self._output_feedforward = output_feedforward

        self._accuracy = CategoricalAccuracy()
        self._loss = nn.CrossEntropyLoss()
        initializer(self)
예제 #7
0
 def test_forward_does_a_bilinear_product_when_using_biases(self):
     params = Params({
             u'matrix_1_dim': 2,
             u'matrix_2_dim': 2,
             u'use_input_biases': True
             })
     bilinear = BilinearMatrixAttention.from_params(params)
     bilinear._weight_matrix = Parameter(torch.FloatTensor([[-.3, .5, 1.0],
                                                            [2.0, -1.0, -1.0],
                                                            [1.0, 0.5, 1.0]]))
     bilinear._bias = Parameter(torch.FloatTensor([.1]))
     a_vectors = torch.FloatTensor([[[1, 1], [2, 2]]])
     b_vectors = torch.FloatTensor([[[1, 0], [0, 1]]])
     result = bilinear(a_vectors, b_vectors).detach().numpy()
     assert result.shape == (1, 2, 2)
     assert_almost_equal(result, [[[3.8, 1.1], [5.5, 0.6]]])
 def test_forward_does_a_bilinear_product_when_using_biases(self):
     params = Params({
             'matrix_1_dim': 2,
             'matrix_2_dim': 2,
             'use_input_biases': True
             })
     bilinear = BilinearMatrixAttention.from_params(params)
     bilinear._weight_matrix = Parameter(torch.FloatTensor([[-.3, .5, 1.0],
                                                            [2.0, -1.0, -1.0],
                                                            [1.0, 0.5, 1.0]]))
     bilinear._bias = Parameter(torch.FloatTensor([.1]))
     a_vectors = torch.FloatTensor([[[1, 1], [2, 2]]])
     b_vectors = torch.FloatTensor([[[1, 0], [0, 1]]])
     result = bilinear(a_vectors, b_vectors).detach().numpy()
     assert result.shape == (1, 2, 2)
     assert_almost_equal(result, [[[3.8, 1.1], [5.5, 0.6]]])
예제 #9
0
 def __init__(self, config):
     super(RobertaForRopes, self).__init__(config)
     self.roberta = RobertaModel(config)
     self.find_object1 = MLP(config.hidden_size, 1)
     self.find_object2 = MLP(config.hidden_size, 1)
     self.find_TP = MLP(config.hidden_size, 1)
     self.bb_matrix_attention = LinearMatrixAttention(
         tensor_1_dim=config.hidden_size,
         tensor_2_dim=config.hidden_size,
         combination="x,y,x*y")
     self.bs_bilinear_imilairty = BilinearMatrixAttention(
         matrix_1_dim=config.hidden_size, matrix_2_dim=config.hidden_size)
     self.ss_matrix_attention = LinearMatrixAttention(
         tensor_1_dim=config.hidden_size,
         tensor_2_dim=config.hidden_size,
         combination="x,y,x*y")
     self.rel_SPo1_SPo2 = Relevance(config)
     self.pol_TP_SP = MLP(config.hidden_size * 2, 2)
     # self.rel_TPo1_TPo2 = None
     # self.answer_according_to_question = None
     self.init_weights()
예제 #10
0
    def __init__(self, args):
        super().__init__(args)

        self.n_graph_attn_composition_layers = args.n_graph_attn_composition_layers
        self.output_size = self.transformer.config.hidden_size
        self.graph_dim = args.graph_dim

        if self.use_semantic_graph:
            self.emb_proj = nn.Linear(self.transformer.config.hidden_size,
                                      self.graph_dim)

            def get_gnn_instance(n_layers):
                return RGCN(
                    num_bases=args.graph_n_bases,
                    h_dim=self.graph_dim,
                    num_relations=self.num_relations,
                    num_hidden_layers=n_layers,
                    dropout=args.graph_dropout,
                    activation=self.activation,
                )

            self.rgcn = get_gnn_instance(args.n_graph_layers)
            if self.n_graph_attn_composition_layers > 0:
                self.composition_rgcn = get_gnn_instance(
                    self.n_graph_attn_composition_layers)

            self.attn_biaffine = BilinearMatrixAttention(self.graph_dim,
                                                         self.graph_dim,
                                                         use_input_biases=True)
            self.attn_proj = nn.Linear(4 * self.graph_dim, self.graph_dim)

            self.graph_output_proj = nn.Linear(self.graph_dim, self.graph_dim)
            self.output_size += (2 if self.is_sentence_pair_task else
                                 1) * self.graph_dim

            if self.args.post_combination_layernorm:
                self.post_combination_layernorm = nn.LayerNorm(
                    self.output_size,
                    eps=self.transformer.config.layer_norm_eps)
예제 #11
0
    def __init__(self, vocab: Vocabulary,
                 text_encoder: Seq2SeqEncoder,
                 word_embedder: TextFieldEmbedder,
                 enable_training_log: bool = False,
                 inp_drop_rate: float = 0.2,
                 out_drop_rate: float = 0.2,
                 loss_weights: List = (0.2, 0.4, 0.4),
                 super_mode: str = 'before',
                 backbone: str = 'unet',
                 unet_down_channel: int = 256,
                 feature_sel: int = 127):
        super(UnifiedFollowUp, self).__init__(vocab)
        self.text_encoder = text_encoder
        self.word_embedder = word_embedder

        """
        Define model arch choices
        """
        self.backbone = backbone

        # input dropout
        if inp_drop_rate > 0:
            self.var_inp_dropout = InputVariationalDropout(p=inp_drop_rate)
        else:
            self.var_inp_dropout = lambda x: x
        # output dropout
        if out_drop_rate > 0:
            self.var_out_dropout = InputVariationalDropout(p=out_drop_rate)
        else:
            self.var_out_dropout = lambda x: x

        self.hidden_size = text_encoder.get_output_dim() // 2 if text_encoder.is_bidirectional() \
            else text_encoder.get_output_dim()

        self.output_size = text_encoder.get_output_dim()

        # ele -> element wise multiply
        # dot -> dot product
        # cos -> cosine similarity
        # emb_dot -> embedding dot product
        # emb_cos -> embedding cosine similarity
        # linear -> linear similarity
        # bilinear -> bilinear similarity

        feature_sel = feature_sel
        sel_arr = "{0:07b}".format(int(feature_sel))
        nni_choices = ['ele', 'dot', 'cos', 'emb_dot', 'emb_cos', 'linear', 'bilinear']

        self.segment_choices = [nni_choices[i] for i in range(7) if sel_arr[i] == '1']
        # if expand bi-direction, we will regard forward/backward as two channels
        self.expand_bidir = False

        self.similar_function = ModuleDict({
            'ele': ElementWiseMatrixAttention(),
            'dot': DotProductMatrixAttention(),
            'cos': CosineMatrixAttention(),
            'emb_dot': DotProductMatrixAttention(),
            'emb_cos': CosineMatrixAttention(),
            'bilinear': BilinearMatrixAttention(matrix_1_dim=self.output_size, matrix_2_dim=self.output_size),
            'linear': LinearMatrixAttention(tensor_1_dim=self.output_size, tensor_2_dim=self.output_size)
        })

        self.attn_channel = 0
        for choice in self.segment_choices:
            if choice == 'ele':
                self.attn_channel += self.output_size
            elif choice in ['dot', 'cos', 'emb_dot', 'emb_cos', 'bilinear', 'linear']:
                if self.expand_bidir:
                    self.attn_channel += 2
                else:
                    self.attn_channel += 1

        self.class_mapping: Dict[str, int] = get_class_mapping(super_mode=super_mode)

        # Here we have two choices now, one is MLP, and another is UNet
        if self.backbone == 'unet':
            self.segmentation_net = AttentionUNet(input_channels=self.attn_channel,
                                                  class_number=len(self.class_mapping.keys()),
                                                  down_channel=unet_down_channel)
        else:
            raise Exception("Currently we do not support for other arches.")

        class_zero_weight = loss_weights[0]
        class_one_weight = loss_weights[1]

        self.register_buffer('weight_tensor', torch.tensor([class_zero_weight, class_one_weight,
                                                            1 - class_zero_weight - class_one_weight]))
        self.loss = nn.CrossEntropyLoss(ignore_index=-1,
                                        weight=self.weight_tensor)

        # initialize metrics measurement
        self.metrics = {'ROUGE': BatchAverage(),
                        '_ROUGE1': BatchAverage(),
                        '_ROUGE2': BatchAverage(),
                        # TODO: You can speed up the code by disable BLEU since
                        #  the corpus-based BLEU metric is much time-consuming.
                        'BLEU': CorpusBLEUMetric(),
                        'EM': BatchAverage(),
                        'F1': FScoreMetric(prefix="1"),
                        'F2': FScoreMetric(prefix="2"),
                        'F3': FScoreMetric(prefix="3")}

        parameter_num = count_parameters(self)
        print(parameter_num)

        self.min_width = 8
        self.min_height = 8
        self.enable_training_log = enable_training_log
예제 #12
0
    combination='x,y', activation=tanh)
output = attention(vector, matrix)
print('Output from LinearAttention:', output)

# MatrixAttention
sequence_length1 = 10
sequence_length2 = 15

# dot product attention only allows matrices of the same size
matrix1 = torch.rand((batch_size, sequence_length1, embedding_dim1))
matrix2 = torch.rand((batch_size, sequence_length2, embedding_dim1))

matrix_attention = DotProductMatrixAttention()
output = matrix_attention(matrix1, matrix2)
print('Output shape of DotProductMatrixAttention:', output.shape)

# bilinear & linear attention allows inputs of different sizes
matrix1 = torch.rand((1, sequence_length1, embedding_dim1))
matrix2 = torch.rand((1, sequence_length2, embedding_dim2))

matrix_attention = BilinearMatrixAttention(
    matrix_1_dim=embedding_dim1, matrix_2_dim=embedding_dim2)
output = matrix_attention(matrix1, matrix2)
print('Output shape of BilinearMatrixAttention:', output.shape)

matrix_attention = LinearMatrixAttention(
    tensor_1_dim=embedding_dim1, tensor_2_dim=embedding_dim2,
    combination='x,y', activation=tanh)
output = matrix_attention(matrix1, matrix2)
print('Output shape of LinearMatrixAttention:', output.shape)
def train_valid_base_text_model(model_name):
    """

    :param model_name: the full model name to use
    :return:
    """
    token_indexer = {"tokens": ELMoTokenCharactersIndexer()}

    def tokenizer(x: str):
        return [
            w.text for w in SpacyWordSplitter(language='en_core_web_sm',
                                              pos_tags=False).split_words(x)
        ]

    reader = TextExpDataSetReader(token_indexers=token_indexer,
                                  tokenizer=tokenizer,
                                  add_numeric_data=False)
    train_instances = reader.read(train_data_file_path)
    validation_instances = reader.read(validation_data_file_path)
    vocab = Vocabulary()

    # TODO: change this if necessary
    # batch_size should be: 10 or 9 depends on the input
    # and not shuffle so all the data of the same pair will be in the same batch
    iterator = BasicIterator(
        batch_size=batch_size)  # , instances_per_epoch=10)
    #  sorting_keys=[('sequence_review', 'list_num_tokens')])
    iterator.index_with(vocab)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                   'elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                  'elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    # TODO: check the output of this
    # elmo_embedder = Elmo(options_file, weight_file, num_output_representations=2)
    # word_embeddings = elmo_embedder
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    review_attention_layer = models.AttentionSoftMaxLayer(
        BilinearMatrixAttention(word_embeddings.get_output_dim(),
                                word_embeddings.get_output_dim()))
    seq_attention_layer = models.AttentionSoftMaxLayer(
        DotProductMatrixAttention())

    feed_forward = FeedForward(input_dim=batch_size,
                               num_layers=2,
                               hidden_dims=[batch_size, 1],
                               activations=ReLU(),
                               dropout=[0.2, 0.0])
    fc_review_rep = FeedForward(input_dim=124,
                                num_layers=1,
                                hidden_dims=[10],
                                activations=ReLU())

    criterion = nn.MSELoss()

    metrics_dict = {
        'mean_absolute_error': MeanAbsoluteError(),
    }

    model = models.BasicTextModel(
        word_embedding=word_embeddings,
        review_representation_layer=review_attention_layer,
        seq_representation_layer=seq_attention_layer,
        vocab=vocab,
        criterion=criterion,
        metrics_dict=metrics_dict,
        classifier_feedforward=feed_forward,
        fc_review_rep=fc_review_rep)

    optimizer = optim.Adam(model.parameters(), lr=0.1)
    num_epochs = 2

    run_log_directory = utils.set_folder(
        datetime.now().strftime(
            f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs')

    if not os.path.exists(run_log_directory):
        os.makedirs(run_log_directory)

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_instances,
        validation_dataset=validation_instances,
        num_epochs=num_epochs,
        shuffle=False,
        serialization_dir=run_log_directory,
        patience=10,
        histogram_interval=10,
    )

    model_dict = trainer.train()

    print(f'{model_name}: evaluation measures are:')
    for key, value in model_dict.items():
        print(f'{key}: {value}')
예제 #14
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.3,
            hidden_dim_maxpool: int = 1024,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(MultiHopAttentionQAFreezeDetRes101, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=False,
                                       final_dim=512)
        ###################################################################################################

        # freeze everything related to conv net
        for submodule in self.detector.backbone.modules():
            # if isinstance(submodule, BatchNorm2d):
            # submodule.track_running_stats = False
            for p in submodule.parameters():
                p.requires_grad = False

        for submodule in self.detector.after_roi_align.modules():
            # if isinstance(submodule, BatchNorm2d):
            # submodule.track_running_stats = False
            for p in submodule.parameters():
                p.requires_grad = False

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
예제 #15
0
 def __init__(self, config):
     super(Relevance, self).__init__()
     self.logits_SPo1 = BilinearMatrixAttention(
         matrix_1_dim=config.hidden_size, matrix_2_dim=config.hidden_size)
     self.logits_SPo2 = BilinearMatrixAttention(
         matrix_1_dim=config.hidden_size, matrix_2_dim=config.hidden_size)
예제 #16
0
파일: model_kv.py 프로젝트: viswanathgs/r2c
    def __init__(
        self,
        span_encoder: Seq2SeqEncoder,
        input_dropout: float = 0.3,
        class_embs: bool = True,
        initializer: InitializerApplicator = InitializerApplicator(),
        learned_omcs: dict = {},
    ):
        # VCR dataset becomes unpicklable due to VCR.vocab, but we don't need
        # to pass in vocab from the dataset anyway as the BERT embeddings are
        # pretrained and stored in h5 files per dataset instance. Just pass
        # a dummy vocab instance for init.
        vocab = Vocabulary()
        super(KeyValueAttentionTrunk, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        span_dim = span_encoder.get_output_dim()

        self.question_mlp = torch.nn.Sequential(
            # 2 (bidirectional) * 4 (num_answers) * dim -> dim
            torch.nn.Linear(8 * span_dim, span_dim),
            torch.nn.Tanh(),
        )
        self.answer_mlp = torch.nn.Sequential(
            # 2 (bidirectional) * dim -> 2 (key-value) * dim
            torch.nn.Linear(2 * span_dim, 2 * span_dim),
            torch.nn.Tanh(),
        )
        self.obj_mlp = torch.nn.Sequential(
            # obj_dim -> 2 (key-value) * dim
            torch.nn.Linear(self.detector.final_dim, 2 * span_dim),
            torch.nn.Tanh(),
        )

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.kv_transformer = KeyValueTransformer(
            dim=span_dim,
            num_heads=8,
            num_steps=4,
        )

        self.omcs_index = None
        if learned_omcs.get('enabled', False):
            use_sentence_embs = learned_omcs.get('use_sentence_embeddings',
                                                 True)
            omcs_embs, self.omcs_index = self.load_omcs(use_sentence_embs)
            # Let's replicate the OMCS embeddings to each device to attend over them
            # after FAISS lookup. We could also do faiss.search_and_reconstruct, but
            # that prevents us from using quantized indices for faster search which
            # we might need to.
            self.register_buffer('omcs_embs', omcs_embs)
            self.omcs_mlp = torch.nn.Sequential(
                torch.nn.Linear(768, self.omcs_index.d), )
            self.k = learned_omcs.get('max_neighbors', 5)
            self.similarity_thresh = learned_omcs.get('similarity_thresh', 0.0)

        initializer(self)
예제 #17
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2SeqEncoder,
                 target_encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 target_field_embedder: Optional[TextFieldEmbedder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 target_scale: bool = False,
                 context_preserving: bool = False) -> None:
        '''
        :param vocab: vocab : A Vocabulary, required in order to compute sizes 
                              for input/output projections.
        :param text_field_embedder: Used to embed the text and target text if
                                    target_field_embedder is None but the 
                                    target_encoder is not None.
        :param text_encoder: Sequence Encoder that will create the 
                             representation of each token in the context 
                             sentence.
        :param target_encoder: Encoder that will create the representation of 
                               target text tokens.
        :param feedforward: An optional feed forward layer to apply after
                            either the text encoder if target encoder is None. 
                            Else it would be after the target and the text 
                            encoded representations have been concatenated.
        :param target_field_embedder: Used to embed the target text to give as 
                                      input to the target_encoder. Thus this 
                                      allows a seperate embedding for text and 
                                      target text.
        :param target_concat_text_embedding: Whether or not the target should be 
                                             concatenated to the each word 
                                             embedding within the text before 
                                             being encoded.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param word_dropout: Dropout that is applied after the embedding of the 
                             tokens/words. It will drop entire words with this 
                             probabilty.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.

        .. _variational dropout:
           https://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks.pdf
        '''
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.target_field_embedder = target_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.target_encoder = TimeDistributed(target_encoder)
        self.feedforward = feedforward
        if self.feedforward:
            self.time_feedforward = TimeDistributed(self.feedforward)

        self.attention_layer = BilinearMatrixAttention(
            text_encoder.get_output_dim(), target_encoder.get_output_dim())
        # Whether to concat the encoded text representation with the weighted
        # representation from the attention
        self.context_preserving = context_preserving

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            if self.context_preserving:
                output_dim = (text_encoder.get_output_dim() * 2)
            else:
                output_dim = text_encoder.get_output_dim()
        self.label_projection = TimeDistributed(
            Linear(output_dim, self.num_classes))
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary('labels')
        for label_index, label_name in label_index_name.items():
            label_name = f'F1_{label_name.capitalize()}'
            self.f1_metrics[label_name] = F1Measure(label_index)

        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)
        self._time_naive_dropout = TimeDistributed(self._naive_dropout)
        self._time_variational_dropout = TimeDistributed(
            self._variational_dropout)

        self.target_scale = target_scale

        self.loss = torch.nn.CrossEntropyLoss()

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               text_encoder.get_input_dim(),
                               "text field embedding dim",
                               "text encoder input dim")
        # Ensure that the dimensions of the target or text field embedder and
        # the target encoder match
        target_field_embedder_dim = text_field_embedder.get_output_dim()
        target_field_error = "text field embedding dim"
        if self.target_field_embedder:
            target_field_embedder_dim = target_field_embedder.get_output_dim()
            target_field_error = "target field embedding dim"

        check_dimensions_match(target_field_embedder_dim,
                               target_encoder.get_input_dim(),
                               target_field_error, "target encoder input dim")
        initializer(self)
예제 #18
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.3,
            hidden_dim_maxpool: int = 1024,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            reasoning_use_vision: bool = True,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):

        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)
        self.extractor = SimpleExtractor(pretrained=True,
                                         num_classes=365,
                                         arch='resnet50')
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        # add scene classification visual feature and word embedding feature

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        self.reasoning_use_vision = reasoning_use_vision
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.projection = torch.nn.Conv2d(2048,
                                          self.detector.final_dim,
                                          kernel_size=1,
                                          stride=2,
                                          padding=1,
                                          bias=True)

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()

        # I want to replace the CrossEntropyLoss with LSR

        # self._loss = LabelSmoothingLoss(size=4,smoothing=0.1)
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
예제 #19
0
파일: model.py 프로젝트: viswanathgs/r2c
    def __init__(
        self,
        span_encoder: Seq2SeqEncoder,
        reasoning_encoder: Seq2SeqEncoder,
        input_dropout: float = 0.3,
        hidden_dim_maxpool: int = 1024,
        class_embs: bool = True,
        reasoning_use_obj: bool = True,
        reasoning_use_answer: bool = True,
        reasoning_use_question: bool = True,
        pool_reasoning: bool = True,
        pool_answer: bool = True,
        pool_question: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        learned_omcs: dict = {},
    ):
        # VCR dataset becomes unpicklable due to VCR.vocab, but we don't need
        # to pass in vocab from the dataset anyway as the BERT embeddings are
        # pretrained and stored in h5 files per dataset instance. Just pass
        # a dummy vocab instance for init.
        vocab = Vocabulary()
        super(AttentionQATrunk, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        self.output_dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.omcs_index = None
        if learned_omcs.get('enabled', False):
            use_sentence_embs = learned_omcs.get('use_sentence_embeddings',
                                                 True)
            omcs_embs, self.omcs_index = self.load_omcs(use_sentence_embs)
            # Let's replicate the OMCS embeddings to each device to attend over them
            # after FAISS lookup. We could also do faiss.search_and_reconstruct, but
            # that prevents us from using quantized indices for faster search which
            # we might need to.
            self.register_buffer('omcs_embs', omcs_embs)
            self.omcs_mlp = torch.nn.Sequential(
                torch.nn.Linear(768, self.omcs_index.d), )
            self.k = learned_omcs.get('max_neighbors', 5)
            self.similarity_thresh = learned_omcs.get('similarity_thresh', 0.0)

        initializer(self)
    def __init__(
        self,
        question_encoding_dim: int,
        passage_encoding_dim: int,
        passage_attention_to_span: Seq2SeqEncoder,
        passage_startend_predictor,
        question_attention_to_span: Seq2SeqEncoder,
        passage_attention_to_count: Seq2SeqEncoder,
        num_implicit_nums: int = None,
        passage_count_predictor=None,
        passage_count_hidden2logits=None,
        dropout: float = 0.0,
    ):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        # Parameters for answer start/end prediction from PassageAttention
        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = passage_startend_predictor  # torch.nn.Linear(self.passage_attention_to_span.get_output_dim(), 2)

        # Parameters for answer start/end pred directly from passage encoding (direct PassageSpanAnswer from 1step prog)
        self.oneshot_psa_startend_predictor = torch.nn.Linear(
            passage_encoding_dim, 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.implicit_num_embeddings = torch.nn.Parameter(
            torch.FloatTensor(num_implicit_nums, passage_encoding_dim))
        torch.nn.init.normal_(self.implicit_num_embeddings,
                              mean=0.0,
                              std=0.001)
        self.implicitnum_bilinear_attention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # self.filter_matrix_attention = LinearMatrixAttention(
        #     tensor_1_dim=question_encoding_dim, tensor_2_dim=passage_encoding_dim, combination="x,y,x*y"
        # )

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        self._endpoint_span_extractor = EndpointSpanExtractor(
            input_dim=passage_encoding_dim, combination="x,y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
    def __init__(self,
                 question_encoding_dim: int,
                 passage_encoding_dim: int,
                 passage_attention_to_span: Seq2SeqEncoder,
                 question_attention_to_span: Seq2SeqEncoder,
                 passage_attention_to_count: Seq2SeqEncoder,
                 passage_count_predictor=None,
                 passage_count_hidden2logits=None,
                 dropout: float = 0.0):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = torch.nn.Linear(
            self.passage_attention_to_span.get_output_dim(), 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
def train_valid_base_text_decision_results_ep_model(
        model_name: str,
        single_round_label: bool,
        use_only_prev_round: bool,
        train_data_file_name: str,
        validation_data_file_name: str,
        no_history: bool = False):
    """
    This function train and validate model that use texts and numbers.
    :param: model_name: the full model name
    :param single_round_label: the label to use: single round of total payoff
    :param use_only_prev_round: if to use all the history or only the previous round
    :param train_data_file_name: the name of the train_data to use
    :param validation_data_file_name: the name of the validation_data to use
    :param no_history: if we don't want to use any history data
    :return:
    """
    token_indexer = {"tokens": ELMoTokenCharactersIndexer()}

    def tokenizer(x: str):
        return [
            w.text for w in SpacyWordSplitter(language='en_core_web_sm',
                                              pos_tags=False).split_words(x)
        ]

    reader = TextExpDataSetReader(token_indexers=token_indexer,
                                  tokenizer=tokenizer,
                                  add_numeric_data=True,
                                  use_only_prev_round=use_only_prev_round,
                                  single_round_label=single_round_label,
                                  three_losses=True,
                                  no_history=no_history)
    train_data_file_inner_path = os.path.join(data_directory,
                                              train_data_file_name)
    validation_data_file_inner_path = os.path.join(data_directory,
                                                   validation_data_file_name)
    train_instances = reader.read(train_data_file_inner_path)
    validation_instances = reader.read(validation_data_file_inner_path)
    vocab = Vocabulary()

    # TODO: change this if necessary
    # batch_size should be: 10 or 9 depends on the input
    # and not shuffle so all the data of the same pair will be in the same batch
    iterator = BasicIterator(batch_size=9)  # , instances_per_epoch=10)
    #  sorting_keys=[('sequence_review', 'list_num_tokens')])
    iterator.index_with(vocab)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                   'elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                  'elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    # TODO: check the output of this
    # elmo_embedder = Elmo(options_file, weight_file, num_output_representations=2)
    # word_embeddings = elmo_embedder
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    review_attention_layer =\
        models.AttentionSoftMaxLayer(BilinearMatrixAttention(word_embeddings.get_output_dim(), word_embeddings.get_output_dim()))
    seq_attention_layer = models.AttentionSoftMaxLayer(
        DotProductMatrixAttention())
    fc_review_rep_output_dim = reader.max_tokens_len
    fc_review_rep = FeedForward(input_dim=reader.max_tokens_len,
                                num_layers=1,
                                hidden_dims=[fc_review_rep_output_dim],
                                activations=ReLU())
    # seq_attention_layer = FeedForward(input_dim=)

    # numbers_lstm: Seq2VecEncoder = PytorchSeq2VecWrapper(nn.LSTM(2, 10, bidirectional=True, batch_first=True))
    # the shape of the flatten data rep
    feed_forward_input_dim = reader.max_seq_len * (fc_review_rep_output_dim +
                                                   reader.number_length)
    feed_forward_classification = FeedForward(input_dim=feed_forward_input_dim,
                                              num_layers=1,
                                              hidden_dims=[2],
                                              activations=ReLU(),
                                              dropout=[0.0])
    feed_forward_regression = FeedForward(input_dim=feed_forward_input_dim,
                                          num_layers=1,
                                          hidden_dims=[1],
                                          activations=ReLU(),
                                          dropout=[0.0])
    criterion_classification = nn.BCEWithLogitsLoss()
    criterion_regression = nn.MSELoss()

    metrics_dict = {
        "accuracy": CategoricalAccuracy(),
        # 'auc': Auc(),
        # 'F1measure': F1Measure(positive_label=1),
    }

    model = models.BasicTextDecisionResultModel(
        word_embedding=word_embeddings,
        review_representation_layer=review_attention_layer,
        seq_representation_layer=seq_attention_layer,
        vocab=vocab,
        classifier_feedforward_classification=feed_forward_classification,
        classifier_feedforward_regression=feed_forward_regression,
        fc_review_rep=fc_review_rep,
        criterion_classification=criterion_classification,
        criterion_regression=criterion_regression,
        metrics_dict=metrics_dict,
        add_numbers=True,
        max_tokens_len=reader.max_tokens_len,
    )

    optimizer = optim.Adam(model.parameters(), lr=0.1)
    num_epochs = 2

    run_log_directory = utils.set_folder(
        datetime.now().strftime(
            f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs')

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_instances,
        validation_dataset=validation_instances,
        num_epochs=num_epochs,
        shuffle=False,
        serialization_dir=run_log_directory,
        patience=10,
        histogram_interval=10,
    )

    model_dict = trainer.train()

    print(f'{model_name}: evaluation measures are:')
    for key, value in model_dict.items():
        if 'accuracy' in key:
            value = value * 100
        print(f'{key}: {value}')

    # save the model predictions
    model.predictions.to_csv(os.path.join(run_log_directory,
                                          'predictions.csv'))
예제 #23
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.1,
            hidden_dim_maxpool: int = 512,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            preload_path: str = "source_model.th",
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)
        self.BiLSTM = TimeDistributed(MYLSTM(1280, 512, 256))
        self.source_encoder = TimeDistributed(source_LSTM(768, 256))

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )
        self.span_attention_2 = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.obj_attention_2 = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self._matrix_attention = DotProductMatrixAttention()
        #self._matrix_attention = MatrixAttention(similarity_function)

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self.final_mlp_2 = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )

        self.answer_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.question_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.source_answer_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.source_question_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.image_BN = BatchNorm1d(512)
        self.final_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.final_mlp_linear = torch.nn.Sequential(torch.nn.Linear(512, 1))
        self.final_mlp_pool = torch.nn.Sequential(
            torch.nn.Linear(2560, 512),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
        )

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)

        if preload_path is not None:
            logger.info("Preloading!")
            preload = torch.load(preload_path)
            own_state = self.state_dict()
            for name, param in preload.items():
                #if name[0:8] == "_encoder":
                #    suffix = "._module."+name[9:]
                #    logger.info("preload paramter {}".format("span_encoder"+suffix))
                #    own_state["span_encoder"+suffix].copy_(param)
                #新引入的source_encoder
                if name[0:4] == "LSTM":
                    suffix = "._module" + name[4:]
                    logger.info("preload paramter {}".format("source_encoder" +
                                                             suffix))
                    own_state["source_encoder" + suffix].copy_(param)