def __init__(self, word_vec_size, hidden_size, word_mat, dropout_prob=0.1, num_layers=3, no_wordvec_layer=False): super(BOWModel, self).__init__() self.embs = ibp.Embedding.from_pretrained(word_mat) self.rotation = ibp.Linear(word_vec_size, hidden_size) self.sum_drop = ibp.Dropout(dropout_prob) if dropout_prob else None layers = [] for i in range(num_layers): layers.append(ibp.Linear(2 * hidden_size, 2 * hidden_size)) layers.append(ibp.Activation(F.relu)) if dropout_prob: layers.append(ibp.Dropout(dropout_prob)) layers.append(ibp.Linear(2 * hidden_size, len(EntailmentLabels))) layers.append(ibp.LogSoftmax(dim=1)) self.layers = nn.Sequential(*layers)
def __init__(self, word_vec_size, hidden_size, word_mat, dropout_prob=0.1, num_layers=2, no_wordvec_layer=False): super(DecompAttentionModel, self).__init__() self.embs = ibp.Embedding.from_pretrained(word_mat) self.null = nn.Parameter(torch.normal(mean=torch.zeros(word_vec_size))) self.rotation = None hidden_size = word_vec_size self.rotation = ibp.Linear(word_vec_size, hidden_size) def get_feedforward_layers(num_layers, input_size, hidden_size, output_size): layers = [] for i in range(num_layers): layer_in_size = input_size if i == 0 else hidden_size layer_out_size = output_size if i == num_layers - 1 else hidden_size if dropout_prob: layers.append(ibp.Dropout(dropout_prob)) layers.append(ibp.Linear(layer_in_size, layer_out_size)) if i < num_layers - 1: layers.append(ibp.Activation(F.relu)) return layers ff_layers = get_feedforward_layers(num_layers, hidden_size, hidden_size, 1) self.feedforward = nn.Sequential(*ff_layers) compare_layers = get_feedforward_layers(num_layers, 2 * hidden_size, hidden_size, hidden_size) self.compare_ff = nn.Sequential(*compare_layers) output_layers = get_feedforward_layers(num_layers, 2 * hidden_size, hidden_size, hidden_size) output_layers.append(ibp.Linear(hidden_size, len(EntailmentLabels))) output_layers.append(ibp.LogSoftmax(dim=1)) self.output_layer = nn.Sequential(*output_layers)