def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.config = config self.logger = logger self.encoder = vocab.encoder() self.simmat = modules.InteractionMatrix() channels = self.encoder.emb_views() self.ngrams = nn.ModuleList() for ng in range(config['mingram'], config['maxgram'] + 1): self.ngrams.append( ConvMax2d(ng, config['nfilters'], k=config['kmax'], channels=channels)) if config['combine'] == 'dense32': num_feats = len(self.ngrams) * config['kmax'] + (1 if config['idf'] else 0) self.combination = DenseCombination( [config['qlen'] * num_feats, 32, 32, 1], config['shuf']) elif config['combine'] == 'sum': self.combination = SumCombination( len(self.ngrams) * config['kmax'] + (1 if config['idf'] else 0)) elif config['combine'] == 'sumnorm': self.combination = SumCombination( len(self.ngrams) * config['kmax'] + (1 if config['idf'] else 0), False) else: raise ValueError('unknown combine `{combine}`'.format(**config)) self.path = util.path_model(self)
def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.logger = logger self.encoder = vocab.encoder() self.simmat = modules.InteractionMatrix() self.kernels = modules.RbfKernelBank.from_strs(config['mus'], config['sigmas'], dim=1, requires_grad=config['grad_kernels']) self.combine = nn.Linear(self.kernels.count() * self.encoder.emb_views(), 1)
def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.logger = logger self.encoder = vocab.encoder() self.simmat = modules.InteractionMatrix() self.conv = nn.Conv2d(1, config['nfilters'], (1, config['dlen'])) self.combine1 = nn.Linear(config['qlen'] * config['nfilters'], 300) self.combine2 = nn.Linear(300, 300) self.dropout = nn.Dropout(0.2) self.combine3 = nn.Linear(300, 1)
def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.config = config self.logger = logger self.encoder = vocab.encoder() self.simmat = modules.InteractionMatrix() self.conv = nn.Conv2d(self.encoder.emb_views(), config['nfilters'], (config['conv_q'], config['conv_d'])) self.pool = nn.MaxPool2d((config['pool_q'], config['pool_d'])) if config['combine'] == 'dense128': self.combine1 = nn.Linear( (config['qlen'] // config['pool_q']) * (config['dlen'] // config['pool_d'] - 1) * config['nfilters'], 128) self.combine2 = nn.Linear(128, 1)
def initialize(self, random): super().initialize(random) self.encoder = self.vocab.encoder() if not self.encoder.static(): self.logger.warn( 'In most cases, using vocab.train=True will not have an effect on DRMM ' 'because the histogram is not differentiable. An exception might be if ' 'the gradient is proped back by another means, e.g. BERT [CLS] token.' ) self.simmat = modules.InteractionMatrix() self.histogram = { 'count': CountHistogram, 'norm': NormalizedHistogram, 'logcount': LogCountHistogram }[self.histType](self.nbins) channels = self.encoder.emb_views() self.hidden_1 = nn.Linear(self.nbins * channels, self.hidden) self.hidden_2 = nn.Linear(self.hidden, 1) self.combine = { 'idf': IdfCombination, 'sum': SumCombination }[self.combine]()
def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.logger = logger self.embed = vocab.encoder() self.simmat = modules.InteractionMatrix() self.padding, self.convs = nn.ModuleList(), nn.ModuleList() for conv_size in range(1, config['max_ngram'] + 1): if conv_size > 1: self.padding.append(nn.ConstantPad1d((0, conv_size - 1), 0)) else: self.padding.append(nn.Sequential()) # identity self.convs.append(nn.ModuleList()) if self.combine_channels: self.convs[-1].append( nn.Conv1d(self.embed.dim() * self.embed.emb_views(), config['conv_filters'], conv_size)) else: for _ in range(self.embed.emb_views()): self.convs[-1].append( nn.Conv1d(self.embed.dim(), config['conv_filters'], conv_size)) if self.pretrained_kernels: kernels = wordvec_vocab._SOURCES[vocab.config['source']][ vocab.config['variant']](logger, get_kernels=True) for conv, weight, bias in zip(self.convs, *kernels): conv[0].weight.data = torch.from_numpy(weight).float() conv[0].bias.data = torch.from_numpy(bias).float() self.kernels = modules.RbfKernelBank.from_strs( config['mus'], config['sigmas'], dim=1, requires_grad=config['grad_kernels']) channels = config['max_ngram']**2 if config['crossmatch'] else config[ 'max_ngram'] if not self.combine_channels: channels *= self.embed.emb_views( )**2 if config['crossmatch'] else self.embed.emb_views() self.combine = nn.Linear(self.kernels.count() * channels, 1)
def __init__(self, vocab, config, logger, random): super().__init__(config, random) self.logger = logger self.encoder = vocab.encoder() if not self.encoder.static(): logger.warn( 'In most cases, using vocab.train=True will not have an effect on DRMM ' 'because the histogram is not differentiable. An exception might be if ' 'the gradient is proped back by another means, e.g. BERT [CLS] token.' ) self.simmat = modules.InteractionMatrix() self.histogram = { 'count': CountHistogram, 'norm': NormalizedHistogram, 'logcount': LogCountHistogram }[self.config['histo']](config['nbins']) channels = self.encoder.emb_views() self.hidden_1 = nn.Linear(config['nbins'] * channels, config['hidden']) self.hidden_2 = nn.Linear(config['hidden'], 1) self.combine = { 'idf': IdfCombination, 'sum': SumCombination }[config['combine']]()