예제 #1
0
    def test_dot_product_similarity(self):
        # example use case: a batch of size 2,
        # with a time element component (e.g. sentences of length 2) each word is a vector of length 3.
        # it is comparing this with another input of the same type
        output = DotProductMatrixAttention()(torch.FloatTensor([[[0, 0, 0],
                                                                 [4, 5, 6]],
                                                                [[-7, -8, -9],
                                                                 [10, 11,
                                                                  12]]]),
                                             torch.FloatTensor([[[1, 2, 3],
                                                                 [4, 5, 6]],
                                                                [[7, 8, 9],
                                                                 [10, 11,
                                                                  12]]]))

        # for the first batch there is
        #       no correlation between the first words of the input matrix
        #       but perfect correlation for the second word
        # for the second batch there is
        #       negative correlation for the first words
        #       a correlation for the second word
        assert_almost_equal(output.numpy(),
                            numpy.array([[[0, 0], [32, 77]],
                                         [[-194, -266], [266, 365]]]),
                            decimal=2)
    def test_dot_product_similarity(self):
        # example use case: a batch of size 2,
        # with a time element component (e.g. sentences of length 2) each word is a vector of length 3.
        # it is comparing this with another input of the same type
        output = DotProductMatrixAttention()(torch.FloatTensor([[[0, 0, 0], [4, 5, 6]],
                                                                [[-7, -8, -9], [10, 11, 12]]]),
                                             torch.FloatTensor([[[1, 2, 3], [4, 5, 6]],
                                                                [[7, 8, 9], [10, 11, 12]]]))

        # for the first batch there is
        #       no correlation between the first words of the input matrix
        #       but perfect correlation for the second word
        # for the second batch there is
        #       negative correlation for the first words
        #       a correlation for the second word
        assert_almost_equal(output.numpy(), numpy.array([[[0, 0], [32, 77]], [[-194, -266], [266, 365]]]),
                            decimal=2)
예제 #3
0
    def __init__(self,
                 vocab: Vocabulary,
                 encoder_keys: List[str],
                 mask_key: str,
                 pair2vec_config_file: str,
                 pair2vec_model_file: str,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 dropout: float = 0.5,
                 pair2vec_dropout: float = 0.0,
                 bidirectional_pair2vec: bool = True,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self._vocab = vocab
        self.pair2vec = util.get_pair2vec(pair2vec_config_file,
                                          pair2vec_model_file)
        self._encoder_keys = encoder_keys
        self._mask_key = mask_key
        self._text_field_embedder = text_field_embedder
        self._projection_feedforward = projection_feedforward
        self._encoder = encoder
        from allennlp.modules.matrix_attention import DotProductMatrixAttention

        self._matrix_attention = DotProductMatrixAttention()

        self._inference_encoder = inference_encoder
        self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout)
        self._bidirectional_pair2vec = bidirectional_pair2vec

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = VariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
예제 #4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: MatrixAttention,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 dropout: float = 0.5,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = DotProductMatrixAttention()
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = VariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4,
                               projection_feedforward.get_input_dim(),
                               "encoder output dim",
                               "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(),
                               inference_encoder.get_input_dim(),
                               "proj feedforward output dim",
                               "inference lstm input dim")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
예제 #5
0
    def __init__(self, vocab, modeling_layer, dropout=0.2, mask_lstms=True,
                 initializer=InitializerApplicator()):
        super(AttnPairEncoder, self).__init__(vocab)

        self._matrix_attention = DotProductMatrixAttention()
        self._modeling_layer = modeling_layer
        self.pad_idx = vocab.get_token_index(vocab._padding_token)

        d_out_model = modeling_layer.get_output_dim()
        self.output_dim = d_out_model

        self._dropout = torch.nn.Dropout(p=dropout) if dropout > 0 else lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
예제 #6
0
    def __init__(self,
                 pooler: Seq2VecEncoder,
                 knowledge_encoder: Seq2SeqEncoder = None):
        super().__init__()
        self.pooler = pooler
        pass_thru = PassThroughEncoder(pooler.get_input_dim())

        self.knowledge_encoder = TimeDistributed(
            knowledge_encoder or pass_thru)  # TimeDistributed(context_encoder)

        self.knowledge_attn = DotProductMatrixAttention(
        )  # CosineMatrixAttention()
        # self.attn = DotProductMatrixAttention()

        self.input_dim = pooler.get_input_dim()
        self.output_dim = pooler.get_output_dim()
예제 #7
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        embedding_dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        label_smoothing: float = None,
        ignore_span_metric: bool = False,
        srl_eval_path: str = DEFAULT_SRL_EVAL_PATH,
        parser_path:
        str = "/home/rizwan/.allennlp/cache/elmo-allennlp_constituency_parser"
    ) -> None:
        super().__init__(vocab, regularizer)

        if isinstance(bert_model, str):
            self.bert_model = BertModel.from_pretrained(bert_model)
        else:
            self.bert_model = bert_model

        self.num_classes = self.vocab.get_vocab_size("labels")
        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path,
                                             ignore_classes=["V"])
        else:
            self.span_metric = None
        self.tag_projection_layer = Linear(
            2 * self.bert_model.config.hidden_size, self.num_classes)

        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric

        device = 0 if torch.cuda.is_available() else -1
        self.parser = Predictor.from_path(parser_path, cuda_device=device)

        self.syntax_roberta = RobertaModel.from_pretrained(
            '../fairseq/checkpoints_768', 'checkpoint_best.pt')
        self.syntax_roberta.eval()

        self.matrix_attention = DotProductMatrixAttention()

        initializer(self)
예제 #8
0
파일: decom_att.py 프로젝트: dugu9sword/dne
    def __init__(self, vocab: Vocabulary, token_embedder: TokenEmbedder,
                 num_labels: int) -> None:
        super().__init__(vocab)

        self._text_field_embedder = BasicTextFieldEmbedder(
            {"tokens": token_embedder})
        dim = token_embedder.get_output_dim()
        self._attend_feedforward = TimeDistributed(
            FeedForward(dim, 1, 100, torch.nn.ReLU(), 0.2))
        self._matrix_attention = DotProductMatrixAttention()
        self._compare_feedforward = TimeDistributed(
            FeedForward(dim * 2, 1, 100, torch.nn.ReLU(), 0.2))

        # linear denotes "lambda x: x"

        self._aggregate_feedforward = FeedForward(200, 1, num_labels,
                                                  PassThrough(), 0.0)

        self._num_labels = num_labels

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
    def __init__(
        self,
        question_encoding_dim: int,
        passage_encoding_dim: int,
        passage_attention_to_span: Seq2SeqEncoder,
        passage_startend_predictor,
        question_attention_to_span: Seq2SeqEncoder,
        passage_attention_to_count: Seq2SeqEncoder,
        num_implicit_nums: int = None,
        passage_count_predictor=None,
        passage_count_hidden2logits=None,
        dropout: float = 0.0,
    ):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        # Parameters for answer start/end prediction from PassageAttention
        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = passage_startend_predictor  # torch.nn.Linear(self.passage_attention_to_span.get_output_dim(), 2)

        # Parameters for answer start/end pred directly from passage encoding (direct PassageSpanAnswer from 1step prog)
        self.oneshot_psa_startend_predictor = torch.nn.Linear(
            passage_encoding_dim, 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.implicit_num_embeddings = torch.nn.Parameter(
            torch.FloatTensor(num_implicit_nums, passage_encoding_dim))
        torch.nn.init.normal_(self.implicit_num_embeddings,
                              mean=0.0,
                              std=0.001)
        self.implicitnum_bilinear_attention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # self.filter_matrix_attention = LinearMatrixAttention(
        #     tensor_1_dim=question_encoding_dim, tensor_2_dim=passage_encoding_dim, combination="x,y,x*y"
        # )

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        self._endpoint_span_extractor = EndpointSpanExtractor(
            input_dim=passage_encoding_dim, combination="x,y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
예제 #10
0
tanh = Activation.by_name('tanh')()
attention = LinearAttention(
    tensor_1_dim=embedding_dim1, tensor_2_dim=embedding_dim2,
    combination='x,y', activation=tanh)
output = attention(vector, matrix)
print('Output from LinearAttention:', output)

# MatrixAttention
sequence_length1 = 10
sequence_length2 = 15

# dot product attention only allows matrices of the same size
matrix1 = torch.rand((batch_size, sequence_length1, embedding_dim1))
matrix2 = torch.rand((batch_size, sequence_length2, embedding_dim1))

matrix_attention = DotProductMatrixAttention()
output = matrix_attention(matrix1, matrix2)
print('Output shape of DotProductMatrixAttention:', output.shape)

# bilinear & linear attention allows inputs of different sizes
matrix1 = torch.rand((1, sequence_length1, embedding_dim1))
matrix2 = torch.rand((1, sequence_length2, embedding_dim2))

matrix_attention = BilinearMatrixAttention(
    matrix_1_dim=embedding_dim1, matrix_2_dim=embedding_dim2)
output = matrix_attention(matrix1, matrix2)
print('Output shape of BilinearMatrixAttention:', output.shape)

matrix_attention = LinearMatrixAttention(
    tensor_1_dim=embedding_dim1, tensor_2_dim=embedding_dim2,
    combination='x,y', activation=tanh)
예제 #11
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        coverage_ff: FeedForward,
        relation_predictor: FeedForward,
        scale_relation_loss: float = 1.0,
        aggregate: str = "max",
        combination: str = "x,y",
        answer_choice_combination: Optional[str] = None,
        coverage_combination: Optional[str] = None,
        var_dropout: float = 0.0,
        use_projection: bool = False,
        ignore_spans: bool = True,
        ignore_relns: bool = False,
        ignore_ann: bool = False,
        span_extractor: Optional[SpanExtractor] = None,
        reln_ff: Optional[FeedForward] = None,
        attention: Optional[MatrixAttention] = None,
        encoder: Optional[Seq2SeqEncoder] = None,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        """
        :param vocab: AllenNLP Vocabulary
        :param text_field_embedder: AllenNLP Textfield embedder
        :param coverage_ff: Feedforward network that computes the "Fact-Relevance" score_f i.e. how
        well does the fact "cover" the question + answer
        :param relation_predictor: Feedforward network that predicts the relation label R_j
        :param scale_relation_loss: Scalar used to scale the relation loss term, \lambda
        :param aggregate: Pooling function used to aggregate question/fact vector representations in
         "Relation Prediction Score". Choices: max, avg, last
        :param combination: Combination string used to combine vector representation \bigotimes
        :param answer_choice_combination: If set, use this combination string instead of combination
        for combining the answer-based and choice-based fact representation
        :param coverage_combination: If set, use this combination string instead of combination
        for combining the question-choice-based fact rep and fact rep
        :param var_dropout: Variational dropout probability on the input embeddings
        :param use_projection: If set to true, learn a projector to map relation representations to
        a #rel-dimensional vector. Otherwise, the relation predictor should produce embeddings that
        match the #rels.
        :param ignore_spans: If set to true, don't use span representation of the answers in the
        fact_choice_question_rep (default: true)
        :param ignore_relns: If set to true, don't use the relation labels/scores (no relation
        representations computed or scored)
        :param ignore_ann: If set to true, ignore all auxilliary annotation i.e. spans and relations
        Use the entire fact to compute answer span-based representations. No loss computed against
        the relation label. Note that latent relation representations will still be computed
        :param span_extractor: SpanExtractor used to compute answer span representation
        :param reln_ff: Feedforward used to calculate the relation prediction score
        :param attention: Attention function used
        :param encoder: Encoder used to convert seq of word embeddings into contextual (e.g. LSTM)
        representations
        :param initializer: Initializer used for parameters
        """
        super(SpanRelationPredFactAttModel, self).__init__(vocab)
        self._text_field_embedder = text_field_embedder
        self._coverage_ff = coverage_ff
        if attention:
            self._attention = attention
        else:
            self._attention = DotProductMatrixAttention()
        if var_dropout > 0.0:
            self._var_dropout = InputVariationalDropout(var_dropout)
        else:
            self._var_dropout = None

        self._num_relations = vocab.get_vocab_size(namespace="relation_labels")

        self._ignore_spans = ignore_spans
        self._aggregate = aggregate
        self._scale_relation_loss = scale_relation_loss
        if span_extractor is None and not ignore_spans:
            raise ConfigurationError(
                "ignore_spans set to False but no span_extractor provided!")
        self._span_extractor = span_extractor
        self._relation_predictor = relation_predictor
        # simple projector
        if use_projection:
            self._relation_projector = torch.nn.Linear(
                self._relation_predictor.get_output_dim(), self._num_relations)
        else:
            self._relation_projector = None
        self._combination = combination
        if answer_choice_combination:
            self._answer_choice_combination = answer_choice_combination
        else:
            self._answer_choice_combination = combination

        if coverage_combination:
            self._coverage_combination = coverage_combination
        else:
            self._coverage_combination = combination
        self._ignore_ann = ignore_ann
        self._ignore_relns = ignore_relns
        if reln_ff is None and not ignore_relns:
            raise ConfigurationError(
                "ignore_relns set to False but no reln_ff provided!")
        self._reln_ff = reln_ff
        self._encoder = encoder
        self._aggr_label_accuracy = BooleanAccuracy()
        self._aggr_choice_accuracy = CategoricalAccuracy()
        self._relation_loss = torch.nn.BCEWithLogitsLoss()
        self._choice_loss = torch.nn.CrossEntropyLoss()
        initializer(self)
def train_valid_base_text_model(model_name):
    """

    :param model_name: the full model name to use
    :return:
    """
    token_indexer = {"tokens": ELMoTokenCharactersIndexer()}

    def tokenizer(x: str):
        return [
            w.text for w in SpacyWordSplitter(language='en_core_web_sm',
                                              pos_tags=False).split_words(x)
        ]

    reader = TextExpDataSetReader(token_indexers=token_indexer,
                                  tokenizer=tokenizer,
                                  add_numeric_data=False)
    train_instances = reader.read(train_data_file_path)
    validation_instances = reader.read(validation_data_file_path)
    vocab = Vocabulary()

    # TODO: change this if necessary
    # batch_size should be: 10 or 9 depends on the input
    # and not shuffle so all the data of the same pair will be in the same batch
    iterator = BasicIterator(
        batch_size=batch_size)  # , instances_per_epoch=10)
    #  sorting_keys=[('sequence_review', 'list_num_tokens')])
    iterator.index_with(vocab)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                   'elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                  'elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    # TODO: check the output of this
    # elmo_embedder = Elmo(options_file, weight_file, num_output_representations=2)
    # word_embeddings = elmo_embedder
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    review_attention_layer = models.AttentionSoftMaxLayer(
        BilinearMatrixAttention(word_embeddings.get_output_dim(),
                                word_embeddings.get_output_dim()))
    seq_attention_layer = models.AttentionSoftMaxLayer(
        DotProductMatrixAttention())

    feed_forward = FeedForward(input_dim=batch_size,
                               num_layers=2,
                               hidden_dims=[batch_size, 1],
                               activations=ReLU(),
                               dropout=[0.2, 0.0])
    fc_review_rep = FeedForward(input_dim=124,
                                num_layers=1,
                                hidden_dims=[10],
                                activations=ReLU())

    criterion = nn.MSELoss()

    metrics_dict = {
        'mean_absolute_error': MeanAbsoluteError(),
    }

    model = models.BasicTextModel(
        word_embedding=word_embeddings,
        review_representation_layer=review_attention_layer,
        seq_representation_layer=seq_attention_layer,
        vocab=vocab,
        criterion=criterion,
        metrics_dict=metrics_dict,
        classifier_feedforward=feed_forward,
        fc_review_rep=fc_review_rep)

    optimizer = optim.Adam(model.parameters(), lr=0.1)
    num_epochs = 2

    run_log_directory = utils.set_folder(
        datetime.now().strftime(
            f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs')

    if not os.path.exists(run_log_directory):
        os.makedirs(run_log_directory)

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_instances,
        validation_dataset=validation_instances,
        num_epochs=num_epochs,
        shuffle=False,
        serialization_dir=run_log_directory,
        patience=10,
        histogram_interval=10,
    )

    model_dict = trainer.train()

    print(f'{model_name}: evaluation measures are:')
    for key, value in model_dict.items():
        print(f'{key}: {value}')
def train_valid_base_text_decision_results_ep_model(
        model_name: str,
        single_round_label: bool,
        use_only_prev_round: bool,
        train_data_file_name: str,
        validation_data_file_name: str,
        no_history: bool = False):
    """
    This function train and validate model that use texts and numbers.
    :param: model_name: the full model name
    :param single_round_label: the label to use: single round of total payoff
    :param use_only_prev_round: if to use all the history or only the previous round
    :param train_data_file_name: the name of the train_data to use
    :param validation_data_file_name: the name of the validation_data to use
    :param no_history: if we don't want to use any history data
    :return:
    """
    token_indexer = {"tokens": ELMoTokenCharactersIndexer()}

    def tokenizer(x: str):
        return [
            w.text for w in SpacyWordSplitter(language='en_core_web_sm',
                                              pos_tags=False).split_words(x)
        ]

    reader = TextExpDataSetReader(token_indexers=token_indexer,
                                  tokenizer=tokenizer,
                                  add_numeric_data=True,
                                  use_only_prev_round=use_only_prev_round,
                                  single_round_label=single_round_label,
                                  three_losses=True,
                                  no_history=no_history)
    train_data_file_inner_path = os.path.join(data_directory,
                                              train_data_file_name)
    validation_data_file_inner_path = os.path.join(data_directory,
                                                   validation_data_file_name)
    train_instances = reader.read(train_data_file_inner_path)
    validation_instances = reader.read(validation_data_file_inner_path)
    vocab = Vocabulary()

    # TODO: change this if necessary
    # batch_size should be: 10 or 9 depends on the input
    # and not shuffle so all the data of the same pair will be in the same batch
    iterator = BasicIterator(batch_size=9)  # , instances_per_epoch=10)
    #  sorting_keys=[('sequence_review', 'list_num_tokens')])
    iterator.index_with(vocab)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                   'elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/' \
                  'elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    # TODO: check the output of this
    # elmo_embedder = Elmo(options_file, weight_file, num_output_representations=2)
    # word_embeddings = elmo_embedder
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    review_attention_layer =\
        models.AttentionSoftMaxLayer(BilinearMatrixAttention(word_embeddings.get_output_dim(), word_embeddings.get_output_dim()))
    seq_attention_layer = models.AttentionSoftMaxLayer(
        DotProductMatrixAttention())
    fc_review_rep_output_dim = reader.max_tokens_len
    fc_review_rep = FeedForward(input_dim=reader.max_tokens_len,
                                num_layers=1,
                                hidden_dims=[fc_review_rep_output_dim],
                                activations=ReLU())
    # seq_attention_layer = FeedForward(input_dim=)

    # numbers_lstm: Seq2VecEncoder = PytorchSeq2VecWrapper(nn.LSTM(2, 10, bidirectional=True, batch_first=True))
    # the shape of the flatten data rep
    feed_forward_input_dim = reader.max_seq_len * (fc_review_rep_output_dim +
                                                   reader.number_length)
    feed_forward_classification = FeedForward(input_dim=feed_forward_input_dim,
                                              num_layers=1,
                                              hidden_dims=[2],
                                              activations=ReLU(),
                                              dropout=[0.0])
    feed_forward_regression = FeedForward(input_dim=feed_forward_input_dim,
                                          num_layers=1,
                                          hidden_dims=[1],
                                          activations=ReLU(),
                                          dropout=[0.0])
    criterion_classification = nn.BCEWithLogitsLoss()
    criterion_regression = nn.MSELoss()

    metrics_dict = {
        "accuracy": CategoricalAccuracy(),
        # 'auc': Auc(),
        # 'F1measure': F1Measure(positive_label=1),
    }

    model = models.BasicTextDecisionResultModel(
        word_embedding=word_embeddings,
        review_representation_layer=review_attention_layer,
        seq_representation_layer=seq_attention_layer,
        vocab=vocab,
        classifier_feedforward_classification=feed_forward_classification,
        classifier_feedforward_regression=feed_forward_regression,
        fc_review_rep=fc_review_rep,
        criterion_classification=criterion_classification,
        criterion_regression=criterion_regression,
        metrics_dict=metrics_dict,
        add_numbers=True,
        max_tokens_len=reader.max_tokens_len,
    )

    optimizer = optim.Adam(model.parameters(), lr=0.1)
    num_epochs = 2

    run_log_directory = utils.set_folder(
        datetime.now().strftime(
            f'{model_name}_{num_epochs}_epochs_%d_%m_%Y_%H_%M_%S'), 'logs')

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        iterator=iterator,
        train_dataset=train_instances,
        validation_dataset=validation_instances,
        num_epochs=num_epochs,
        shuffle=False,
        serialization_dir=run_log_directory,
        patience=10,
        histogram_interval=10,
    )

    model_dict = trainer.train()

    print(f'{model_name}: evaluation measures are:')
    for key, value in model_dict.items():
        if 'accuracy' in key:
            value = value * 100
        print(f'{key}: {value}')

    # save the model predictions
    model.predictions.to_csv(os.path.join(run_log_directory,
                                          'predictions.csv'))
    def __init__(self,
                 question_encoding_dim: int,
                 passage_encoding_dim: int,
                 passage_attention_to_span: Seq2SeqEncoder,
                 question_attention_to_span: Seq2SeqEncoder,
                 passage_attention_to_count: Seq2SeqEncoder,
                 passage_count_predictor=None,
                 passage_count_hidden2logits=None,
                 dropout: float = 0.0):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = torch.nn.Linear(
            self.passage_attention_to_span.get_output_dim(), 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
예제 #15
0
    def __init__(self, vocab: Vocabulary,
                 text_encoder: Seq2SeqEncoder,
                 word_embedder: TextFieldEmbedder,
                 enable_training_log: bool = False,
                 inp_drop_rate: float = 0.2,
                 out_drop_rate: float = 0.2,
                 loss_weights: List = (0.2, 0.4, 0.4),
                 super_mode: str = 'before',
                 backbone: str = 'unet',
                 unet_down_channel: int = 256,
                 feature_sel: int = 127):
        super(UnifiedFollowUp, self).__init__(vocab)
        self.text_encoder = text_encoder
        self.word_embedder = word_embedder

        """
        Define model arch choices
        """
        self.backbone = backbone

        # input dropout
        if inp_drop_rate > 0:
            self.var_inp_dropout = InputVariationalDropout(p=inp_drop_rate)
        else:
            self.var_inp_dropout = lambda x: x
        # output dropout
        if out_drop_rate > 0:
            self.var_out_dropout = InputVariationalDropout(p=out_drop_rate)
        else:
            self.var_out_dropout = lambda x: x

        self.hidden_size = text_encoder.get_output_dim() // 2 if text_encoder.is_bidirectional() \
            else text_encoder.get_output_dim()

        self.output_size = text_encoder.get_output_dim()

        # ele -> element wise multiply
        # dot -> dot product
        # cos -> cosine similarity
        # emb_dot -> embedding dot product
        # emb_cos -> embedding cosine similarity
        # linear -> linear similarity
        # bilinear -> bilinear similarity

        feature_sel = feature_sel
        sel_arr = "{0:07b}".format(int(feature_sel))
        nni_choices = ['ele', 'dot', 'cos', 'emb_dot', 'emb_cos', 'linear', 'bilinear']

        self.segment_choices = [nni_choices[i] for i in range(7) if sel_arr[i] == '1']
        # if expand bi-direction, we will regard forward/backward as two channels
        self.expand_bidir = False

        self.similar_function = ModuleDict({
            'ele': ElementWiseMatrixAttention(),
            'dot': DotProductMatrixAttention(),
            'cos': CosineMatrixAttention(),
            'emb_dot': DotProductMatrixAttention(),
            'emb_cos': CosineMatrixAttention(),
            'bilinear': BilinearMatrixAttention(matrix_1_dim=self.output_size, matrix_2_dim=self.output_size),
            'linear': LinearMatrixAttention(tensor_1_dim=self.output_size, tensor_2_dim=self.output_size)
        })

        self.attn_channel = 0
        for choice in self.segment_choices:
            if choice == 'ele':
                self.attn_channel += self.output_size
            elif choice in ['dot', 'cos', 'emb_dot', 'emb_cos', 'bilinear', 'linear']:
                if self.expand_bidir:
                    self.attn_channel += 2
                else:
                    self.attn_channel += 1

        self.class_mapping: Dict[str, int] = get_class_mapping(super_mode=super_mode)

        # Here we have two choices now, one is MLP, and another is UNet
        if self.backbone == 'unet':
            self.segmentation_net = AttentionUNet(input_channels=self.attn_channel,
                                                  class_number=len(self.class_mapping.keys()),
                                                  down_channel=unet_down_channel)
        else:
            raise Exception("Currently we do not support for other arches.")

        class_zero_weight = loss_weights[0]
        class_one_weight = loss_weights[1]

        self.register_buffer('weight_tensor', torch.tensor([class_zero_weight, class_one_weight,
                                                            1 - class_zero_weight - class_one_weight]))
        self.loss = nn.CrossEntropyLoss(ignore_index=-1,
                                        weight=self.weight_tensor)

        # initialize metrics measurement
        self.metrics = {'ROUGE': BatchAverage(),
                        '_ROUGE1': BatchAverage(),
                        '_ROUGE2': BatchAverage(),
                        # TODO: You can speed up the code by disable BLEU since
                        #  the corpus-based BLEU metric is much time-consuming.
                        'BLEU': CorpusBLEUMetric(),
                        'EM': BatchAverage(),
                        'F1': FScoreMetric(prefix="1"),
                        'F2': FScoreMetric(prefix="2"),
                        'F3': FScoreMetric(prefix="3")}

        parameter_num = count_parameters(self)
        print(parameter_num)

        self.min_width = 8
        self.min_height = 8
        self.enable_training_log = enable_training_log
예제 #16
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.1,
            hidden_dim_maxpool: int = 512,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            preload_path: str = "source_model.th",
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)
        self.BiLSTM = TimeDistributed(MYLSTM(1280, 512, 256))
        self.source_encoder = TimeDistributed(source_LSTM(768, 256))

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )
        self.span_attention_2 = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.obj_attention_2 = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self._matrix_attention = DotProductMatrixAttention()
        #self._matrix_attention = MatrixAttention(similarity_function)

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self.final_mlp_2 = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )

        self.answer_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.question_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.source_answer_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.source_question_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.image_BN = BatchNorm1d(512)
        self.final_BN = torch.nn.Sequential(BatchNorm1d(512))
        self.final_mlp_linear = torch.nn.Sequential(torch.nn.Linear(512, 1))
        self.final_mlp_pool = torch.nn.Sequential(
            torch.nn.Linear(2560, 512),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
        )

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)

        if preload_path is not None:
            logger.info("Preloading!")
            preload = torch.load(preload_path)
            own_state = self.state_dict()
            for name, param in preload.items():
                #if name[0:8] == "_encoder":
                #    suffix = "._module."+name[9:]
                #    logger.info("preload paramter {}".format("span_encoder"+suffix))
                #    own_state["span_encoder"+suffix].copy_(param)
                #新引入的source_encoder
                if name[0:4] == "LSTM":
                    suffix = "._module" + name[4:]
                    logger.info("preload paramter {}".format("source_encoder" +
                                                             suffix))
                    own_state["source_encoder" + suffix].copy_(param)