def _alignment_module(op, hidden_size): module = _utils.get_module(AlignmentNetwork, op, hidden_size=hidden_size, required=True) module.expect_signature("[AxBxC, AxDxC] -> [AxBxD]") return module
def _create(cls, arg, **kwargs): r"""Create a word aggregator object. Args: arg (str or :mod:`deepmatcher.word_aggregators` or callable): Same as the `word_aggregator` argument to the constructor of :class:`AttrSummarizer`. **kwargs: Keyword arguments to the constructor of the WordAggregator sub-class. For details on what these can be, please refer to the documentation of the sub-classes in :mod:`deepmatcher.word_aggregators`. """ assert arg is not None if isinstance(arg, six.string_types): parts = arg.split("-") if parts[-1] == "pool" and dm.word_aggregators.Pool.supports_style( "-".join(parts[:-1]) ): seq = [] seq.append(dm.modules.Lambda(lambda x1, x2: x1)) # Ignore the context. seq.append(dm.word_aggregators.Pool(style="-".join(parts[:-1]))) # Make lazy module. wa = LazyModuleFn(lambda: dm.modules.MultiSequential(*seq)) elif arg == "attention-with-rnn": wa = dm.word_aggregators.AttentionWithRNN(**kwargs) else: raise ValueError("Unknown Word Aggregator name.") else: wa = _utils.get_module(WordAggregator, arg) wa.expect_signature("[AxBxC] -> [AxD]") return wa
def _create(cls, arg, **kwargs): r"""Create a word comparator object. Args: arg (str or :mod:`deepmatcher.word_comparators` or callable): Same as the `word_comparator` argument to the constructor of :class:`AttrSummarizer`. **kwargs: Keyword arguments to the constructor of the WordComparator sub-class. For details on what these can be, please refer to the documentation of the sub-classes in :mod:`deepmatcher.word_comparators`. """ if isinstance(arg, six.string_types): parts = arg.split("-") if parts[1] == "attention" and dm.modules.AlignmentNetwork.supports_style( parts[0] ): wc = dm.word_comparators.Attention(alignment_network=parts[0], **kwargs) else: raise ValueError("Unknown Word Comparator name.") else: wc = _utils.get_module(WordComparator, arg) if wc is not None: wc.expect_signature("[AxBxC, AxDxC, AxBxE, AxDxE] -> [AxBxF]") return wc
def _create(cls, arg, **kwargs): r"""Create a word contextualizer object. Args: arg (str or :mod:`deepmatcher.word_contextualizers` or callable): Same as the `word_contextualizer` argument to the constructor of :class:`AttrSummarizer`. **kwargs: Keyword arguments to the constructor of the WordContextualizer sub-class. For details on what these can be, please refer to the documentation of the sub-classes in :mod:`deepmatcher.word_contextualizers`. """ if isinstance(arg, six.string_types): if dm.word_contextualizers.RNN.supports_style(arg): wc = dm.word_contextualizers.RNN(arg, **kwargs) elif arg == "self-attention": wc = dm.word_contextualizers.SelfAttention(**kwargs) else: raise ValueError("Unknown Word Contextualizer name.") else: wc = _utils.get_module(WordContextualizer, arg) if wc is not None: wc.expect_signature("[AxBxC] -> [AxBxD]") return wc
def _create(cls, arg, **kwargs): r"""Create an attribute summarization object. Args: arg (str or :mod:`deepmatcher.attr_summarizers` or callable): Same as the `attr_summarizer` argument to the constructor of :class:`MatchingModel`. **kwargs: Keyword arguments to the constructor of the AttrSummarizer sub-class. For details on what these can be, please refer to the documentation of the sub-classes in :mod:`deepmatcher.attr_summarizers`. """ assert arg is not None if isinstance(arg, six.string_types): type_map = { "sif": dm.attr_summarizers.SIF, "rnn": dm.attr_summarizers.RNN, "attention": dm.attr_summarizers.Attention, "hybrid": dm.attr_summarizers.Hybrid, } if arg in type_map: asr = type_map[arg](**kwargs) else: raise ValueError("Unknown Attribute Summarizer name.") else: asr = _utils.get_module(AttrSummarizer, arg) asr.expect_signature("[AxBxC, AxDxC] -> [AxE, AxE]") return asr
def _transform_module(op, hidden_size, output_size=None): output_size = output_size or hidden_size module = _utils.get_module(Transform, op, hidden_size=hidden_size, output_size=output_size) if module: module.expect_signature("[AxB] -> [AxC]") module.expect_signature("[AxBxC] -> [AxBxD]") return module
def _init( self, hidden_size=None, input_dropout=0, rnn="gru", rnn_pool_style="birnn-last", score_dropout=0, input_context_comparison_network="1-layer-highway", value_transform_network=None, transform_dropout=0, input_size=None, ): # self.alignment_network = dm.modules._alignment_module( # alignment_network, hidden_size=hidden_size) assert rnn is not None self.rnn = _utils.get_module(dm.modules.RNN, rnn, hidden_size=hidden_size) self.rnn.expect_signature("[AxBxC] -> [AxBx{D}]".format(D=hidden_size)) self.rnn_pool = ModulePool(rnn_pool_style) self.input_context_comparison_network = dm.modules._transform_module( input_context_comparison_network, hidden_size=hidden_size) self.scoring_network = dm.modules._transform_module("1-layer", hidden_size=1) self.value_transform_network = dm.modules._transform_module( value_transform_network, hidden_size=hidden_size) self.input_dropout = nn.Dropout(input_dropout) self.transform_dropout = nn.Dropout(transform_dropout) self.score_dropout = nn.Dropout(score_dropout) self.softmax = nn.Softmax(dim=1)
def _bypass_module(op): module = _utils.get_module(Bypass, op) if module: module.expect_signature("[AxB, AxC] -> [AxB]") return module
def _merge_module(op): module = _utils.get_module(Merge, op) if module: module.expect_signature("[AxB, AxB] -> [AxC]") return module
def initialize(self, train_dataset, init_batch=None): """Initialize (not lazily) the matching model given the actual training data. Instantiates all sub-components and their trainable parameters. Args: train_dataset (:class:`~deepmatcher.data.MatchingDataset`): The training dataset obtained using :func:`deepmatcher.data.process`. init_batch (:class:`~deepmatcher.batch.MatchingBatch`): A batch of data to forward propagate through the model. If None, a batch is drawn from the training dataset. """ if self._initialized: return # Copy over training info from train set for persistent state. But remove actual # data examples. self.meta = Bunch(**train_dataset.__dict__) if hasattr(self.meta, "fields"): del self.meta.fields del self.meta.examples self._register_train_buffer("state_meta", Bunch(**self.meta.__dict__)) del ( self.state_meta.metadata ) # we only need `self.meta.orig_metadata` for state. self.attr_summarizers = dm.modules.ModuleMap() if isinstance(self.attr_summarizer, Mapping): for name, summarizer in self.attr_summarizer.items(): self.attr_summarizers[name] = AttrSummarizer._create( summarizer, hidden_size=self.hidden_size ) assert ( len( set(self.attr_summarizers.keys()) ^ set(self.meta.canonical_text_fields) ) == 0 ) else: self.attr_summarizer = AttrSummarizer._create( self.attr_summarizer, hidden_size=self.hidden_size ) for name in self.meta.canonical_text_fields: self.attr_summarizers[name] = copy.deepcopy(self.attr_summarizer) if self.attr_condense_factor == "auto": self.attr_condense_factor = min(len(self.meta.canonical_text_fields), 6) if self.attr_condense_factor == 1: self.attr_condense_factor = None if not self.attr_condense_factor: self.attr_condensors = None else: self.attr_condensors = dm.modules.ModuleMap() for name in self.meta.canonical_text_fields: self.attr_condensors[name] = dm.modules.Transform( "1-layer-highway", non_linearity=None, output_size=self.hidden_size // self.attr_condense_factor, ) self.attr_comparators = dm.modules.ModuleMap() if isinstance(self.attr_comparator, Mapping): for name, comparator in self.attr_comparator.items(): self.attr_comparators[name] = _create_attr_comparator(comparator) assert ( len( set(self.attr_comparators.keys()) ^ set(self.meta.canonical_text_fields) ) == 0 ) else: if isinstance(self.attr_summarizer, AttrSummarizer): self.attr_comparator = self._get_attr_comparator( self.attr_comparator, self.attr_summarizer ) else: if self.attr_comparator is None: raise ValueError( '"attr_comparator" must be specified if ' '"attr_summarizer" is custom.' ) self.attr_comparator = _create_attr_comparator(self.attr_comparator) for name in self.meta.canonical_text_fields: self.attr_comparators[name] = copy.deepcopy(self.attr_comparator) self.attr_merge = dm.modules._merge_module(self.attr_merge) self.classifier = _utils.get_module( Classifier, self.classifier, hidden_size=self.hidden_size ) self._reset_embeddings(train_dataset.vocabs) # Instantiate all components using a small batch from training set. if not init_batch: run_iter = MatchingIterator( train_dataset, train_dataset, train=False, batch_size=4, device=-1, sort_in_buckets=False, ) init_batch = next(run_iter.__iter__()) self.forward(init_batch) # Keep this init_batch for future initializations. self.state_meta.init_batch = init_batch self._initialized = True logger.info( "Successfully initialized MatchingModel with {:d} trainable " "parameters.".format(tally_parameters(self)) )