def __init__(self, embeddings, config): super(KNRM_class, self).__init__() pad_token = config["pad_token"] self.p = config mus = [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] sigmas = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.001] self.kernels = RbfKernelBank(mus, sigmas, dim=1, requires_grad=config["gradkernels"]) self.embedding = create_emb_layer(embeddings, non_trainable=True) self.simmat = SimilarityMatrix(padding=pad_token) channels = 1 if config["singlefc"]: combine_steps = [nn.Linear(self.kernels.count() * channels, 1)] else: combine_steps = [ nn.Linear(self.kernels.count() * channels, 30), nn.Tanh(), nn.Linear(30, 1) ] if config["scoretanh"]: combine_steps.append(nn.Tanh()) self.combine = nn.Sequential(*combine_steps)
def __init__(self, weights_matrix, config): super(PACRR_class, self).__init__() p = config self.p = p self.embedding_dim = weights_matrix.shape[1] self.embedding = create_emb_layer(weights_matrix, non_trainable=True) self.simmat = SimilarityMatrix(padding=config["pad_token"]) self.ngrams = nn.ModuleList() for ng in range(p["mingram"], p["maxgram"] + 1): self.ngrams.append( PACRRConvMax2dModule(ng, p["nfilters"], k=p["kmax"], channels=1)) qterm_size = len(self.ngrams) * p["kmax"] + (1 if p["idf"] else 0) self.linear1 = torch.nn.Linear(p["maxqlen"] * qterm_size, p["combine"]) self.linear2 = torch.nn.Linear(p["combine"], p["combine"]) self.linear3 = torch.nn.Linear(p["combine"], 1) if p["nonlinearity"] == "none": nonlinearity = torch.nn.Identity elif p["nonlinearity"] == "relu": nonlinearity = torch.nn.ReLU elif p["nonlinearity"] == "tanh": nonlinearity = torch.nn.Tanh self.combine = torch.nn.Sequential(self.linear1, nonlinearity(), self.linear2, nonlinearity(), self.linear3)
def __init__(self, embeddings, config): super(DRMM_class, self).__init__() self.p = config self.nbins = self.p["nbins"] self.nodes = self.p["nodes"] self.hist_type = self.p["histType"] self.gate_type = self.p["gateType"] self.embedding = create_emb_layer(embeddings, non_trainable=True) self.ffw = nn.Sequential(nn.Linear(self.nbins + 1, self.nodes), nn.Tanh(), nn.Linear(self.nodes, 1), nn.Tanh()) emb_dim = self.embedding.weight.size(-1) if self.gate_type == "IDF": self.gates = nn.Linear(1, 1, bias=False) # for idf scalar elif self.gate_type == "TV": self.gates = nn.Linear(emb_dim, 1, bias=False) else: raise ValueError( "Invalid value for gateType: gateType should be either IDF or TV" ) self.output_layer = nn.Linear(1, 1) # initialize FC and gate weight in the same way as MatchZoo nn.init.uniform_(self.ffw[0].weight, -0.1, 0.1) nn.init.uniform_(self.ffw[2].weight, -0.1, 0.1) nn.init.uniform_(self.gates.weight, -0.01, 0.01)
def __init__(self, extractor, p): super(HiNT, self).__init__() self.p = p self.passagelen = int(p["trainer"]["maxdoclen"] / 100) # 100: windows size Ws_dim = 1 # fix to 1, since we assume 1 when creating GRUModel (2*Ws_dim + 1) self.batch_size, self.lstm_hidden_dim = p["trainer"]["batch"], self.p[ "LSTMdim"] self.embedding = create_emb_layer(extractor.embeddings, non_trainable=True) embedding_dim = self.embedding.weight.shape[1] self.Ws = nn.Linear(embedding_dim, Ws_dim) self.GRU2d1 = GRUModel2d(3, self.p["spatialGRU"]) self.GRU2d3 = GRUModel2d(3, self.p["spatialGRU"]) self.lstm = nn.LSTM(input_size=(4 * self.p["spatialGRU"]), hidden_size=self.lstm_hidden_dim, bidirectional=True) self.Wv = nn.Linear((4 * self.p["spatialGRU"]), self.lstm_hidden_dim, bias=True) self.fc = nn.Linear(self.lstm_hidden_dim * self.p["kmax"], 1)
def __init__(self, extractor, config): super(ConvKNRM_class, self).__init__() self.p = config self.simmat = SimilarityMatrix(padding=extractor.pad) self.embeddings = create_emb_layer(extractor.embeddings, non_trainable=True) mus = [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] sigmas = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.001] self.kernels = RbfKernelBank(mus, sigmas, dim=1, requires_grad=config["gradkernels"]) self.padding, self.convs = nn.ModuleList(), nn.ModuleList() for conv_size in range(1, config["maxngram"] + 1): if conv_size > 1: self.padding.append(nn.ConstantPad1d((0, conv_size - 1), 0)) else: self.padding.append(nn.Sequential()) # identity self.convs.append(nn.ModuleList()) for _ in range(1): self.convs[-1].append(nn.Conv1d(self.embeddings.weight.shape[1], config["filters"], conv_size)) channels = config["maxngram"] ** 2 if config["crossmatch"] else config["maxngram"] channels *= 1 ** 2 if config["crossmatch"] else 1 if config["singlefc"]: combine_steps = [nn.Linear(self.kernels.count() * channels, 1)] else: combine_steps = [nn.Linear(self.kernels.count() * channels, 30), nn.Tanh(), nn.Linear(30, 1)] if config["scoretanh"]: combine_steps.append(nn.Tanh()) self.combine = nn.Sequential(*combine_steps)
def __init__(self, weights_matrix, p): super(CDSSM_class, self).__init__() self.p = p self.embedding = create_emb_layer(weights_matrix, non_trainable=True) self.conv = nn.Sequential(nn.Conv1d(1, p["nfilter"], p["nkernel"]), nn.ReLU(), nn.Dropout(p["dropoutrate"])) self.ffw = nn.Linear(p["nkernel"], p["nhiddens"]) self.output_layer = nn.Sequential(nn.Sigmoid())
def __init__(self, extractor, config): super(CDSSM_class, self).__init__() self.p = config self.embedding = create_emb_layer(extractor.embeddings, non_trainable=True) self.conv = nn.Sequential( nn.Conv1d(1, config["nfilter"], config["nkernel"]), nn.ReLU(), nn.Dropout(config["dropoutrate"]) ) self.ffw = nn.Linear(config["nkernel"], config["nhiddens"]) self.output_layer = nn.Sequential(nn.Sigmoid())
def __init__(self, extractor, config): super(DRMMTKS_class, self).__init__() self.topk = config["topk"] self.gate_type = config["gateType"] self.embedding = create_emb_layer(extractor.embeddings, non_trainable=config["freezeemb"]) self.simmat = SimilarityMatrix(self.embedding) self.ffw = nn.Sequential(nn.Linear(self.topk, 1), nn.Tanh()) gate_inp_dim = 1 if self.gate_type == "IDF" else self.embedding.weight.size( -1) self.gates = nn.Linear(gate_inp_dim, 1, bias=False) # for idf scalar self.output_layer = nn.Linear(1, 1) # initialize FC and gate weight in the same way as MatchZoo nn.init.uniform_(self.ffw[0].weight, -0.1, 0.1) nn.init.uniform_(self.gates.weight, -0.01, 0.01)
def __init__(self, weights_matrix, p): super(DistributedModel, self).__init__() if p["activation"] == "tanh": self.activation = nn.Tanh() elif p["activation"] == "relu": self.activation = nn.ReLU() else: raise ValueError( "Unexpected activation: should be either tanh or relu") self.emb = create_emb_layer(weights_matrix, non_trainable=True) embsize = weights_matrix.shape[-1] print("weights_matrix embsize: ", embsize) self.q_conv = nn.Sequential( nn.Conv2d(1, p["nfilters"], (3, embsize)), self.activation, nn.Dropout(p["dropoutrate"]), nn.MaxPool2d((2, 1), stride=(1, 1)), ) self.q_ffw = nn.Sequential(nn.Linear(p["nfilters"], p["nfilters"])) # (B, 1, Q, V) -> (B, H, Q', 1) self.d_conv1 = nn.Sequential( nn.Conv2d(1, p["nfilters"], (3, embsize)), self.activation, nn.Dropout(p["dropoutrate"]), nn.MaxPool2d((100, 1), stride=(1, 1)), ) self.d_conv2 = nn.Sequential( nn.Conv2d(1, p["nfilters"], (p["nfilters"], 1)), # (B, 1, H, Q') -> (B, H, 1, Q') self.activation, nn.Dropout(p["dropoutrate"]), ) self.ffw_1 = nn.Sequential(nn.Linear(p["nhidden"], 1), self.activation) self.ffw_2 = nn.Sequential(nn.Dropout(p["dropoutrate"]), nn.Linear(p["nfilters"], 1))
def __init__(self, extractor, config): super(TK_class, self).__init__() self.embeddim = extractor.embeddings.shape[1] self.p = config self.mus = torch.tensor( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0], dtype=torch.float) self.mu_matrix = self.get_mu_matrix(extractor) self.sigma = torch.tensor(0.1, requires_grad=False) dropout = 0 non_trainable = not self.p["finetune"] self.embedding = create_emb_layer(extractor.embeddings, non_trainable=non_trainable) self.cosine_module = StackedSimilarityMatrix(padding=extractor.pad) self.position_encoder = PositionalEncoding(self.embeddim) self.mixer = nn.Parameter( torch.full([1, 1, 1], 0.9, dtype=torch.float32, requires_grad=True)) encoder_layers = TransformerEncoderLayer(self.embeddim, config["numattheads"], config["ffdim"], dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, config["numlayers"]) self.s_log_fcc = nn.Linear(len(self.mus), 1, bias=False) self.s_len_fcc = nn.Linear(len(self.mus), 1, bias=False) self.comb_fcc = nn.Linear(2, 1, bias=False) # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.s_log_fcc.weight, -0.014, 0.014) # inits taken from matchzoo torch.nn.init.uniform_(self.s_len_fcc.weight, -0.014, 0.014) # inits taken from matchzoo # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time torch.nn.init.uniform_(self.comb_fcc.weight, -0.014, 0.014) # inits taken from matchzoo
def __init__(self, weights_matrix, pipeline_config): super(POSITDRMM_basic, self).__init__() # self.embedding_dim=embedding_dim self.embedding_dim = weights_matrix.shape[1] self.lstm_hidden_dim = weights_matrix.shape[1] self.batch_size = pipeline_config["batch"] self.QUERY_LENGTH = pipeline_config["maxqlen"] # self.lstm_hidden_dim = lstm_hidden_dim self.lstm_num_layers = 2 self.encoding_layer = nn.LSTM( input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.lstm_num_layers, bidirectional=True, dropout=0.3, ) self.pad_token = pipeline_config["pad_token"] self.embedding = create_emb_layer(weights_matrix, non_trainable=True) self.m = nn.Dropout(p=0.2) self.Q1 = nn.Linear(6, 1, bias=True) self.Wg = nn.Linear(5, 1) self.activation = nn.LeakyReLU() self.hidden = self.init_hidden()