def __init__(self, words, args): super(Model, self).__init__() self.args = args self.n_d = args.d self.depth = args.depth self.drop = nn.Dropout(args.dropout) self.embedding_layer = EmbeddingLayer(self.n_d, words) self.n_V = self.embedding_layer.n_V if args.lstm: self.rnn = nn.LSTM(self.n_d, self.n_d, self.depth, dropout=args.rnn_dropout) else: self.rnn = MF.SRU(self.n_d, self.n_d, self.depth, dropout=args.rnn_dropout, rnn_dropout=args.rnn_dropout, use_tanh=0) self.output_layer = nn.Linear(self.n_d, self.n_V) # tie weights self.output_layer.weight = self.embedding_layer.embedding.weight self.init_weights() if not args.lstm: self.rnn.set_bias(args.bias)
def __init__(self, args, emb_layer, nclasses=2): super(Model, self).__init__() self.args = args self.drop = nn.Dropout(args.dropout) self.emb_layer = emb_layer if args.cnn: self.encoder = modules.CNN_Text( emb_layer.n_d, widths = [3,4,5] ) d_out = 300 elif args.lstm: self.encoder = nn.LSTM( emb_layer.n_d, args.d, args.depth, dropout = args.dropout, ) d_out = args.d else: self.encoder = MF.SRU( emb_layer.n_d, args.d, args.depth, dropout = args.dropout, use_tanh = 1, ) d_out = args.d self.out = nn.Linear(d_out, nclasses)
def __init__(self, args, emb_layer, nclasses=2, feature_dropout=False): super(Classifier, self).__init__() self.args = args self.drop = nn.Dropout(args.dropout) self.emb_layer = emb_layer self.feature_dropout = feature_dropout self.deep_shallow = args.deep_shallow self.state_size = args.state_size # self.layer_repr = args.layer_lr if args.lstm: self.encoder = nn.LSTM( emb_layer.n_d, args.state_size, args.layers, dropout=args.dropout, ) else: self.encoder = MF.SRU(emb_layer.n_d, args.state_size, args.layers, dropout=args.dropout, use_tanh=1, bidirectional=True) self.out_proj = nn.Linear(args.state_size * 5 * 2, nclasses) self.init_weights() if not args.lstm: self.encoder.set_bias(args.bias)
def __init__(self, words, args): super(Model, self).__init__() self.args = args self.n_d = args.d self.depth = args.depth self.drop = nn.Dropout(args.dropout) self.embedding_layer = EmbeddingLayer(self.n_d, words) self.n_V = self.embedding_layer.n_V if args.omer: self.architecture = Architecture(args.content, args.gates) self.rnn = Contextualizer([self.n_d] * (self.depth + 1), args.rnn_dropout, args.rnn_dropout, False, self.architecture, False) elif args.lstm: self.rnn = nn.LSTM(self.n_d, self.n_d, self.depth, dropout=args.rnn_dropout) else: self.rnn = MF.SRU(self.n_d, self.n_d, self.depth, dropout=args.rnn_dropout, rnn_dropout=args.rnn_dropout, use_tanh=0) self.output_layer = nn.Linear(self.n_d, self.n_V) # tie weights self.output_layer.weight = self.embedding_layer.embedding.weight self.init_weights() if (not args.omer) and (not args.lstm): self.rnn.set_bias(args.bias)
# 2. do temp max pooling # TODO: temporal max pooling here... a = torch.max(outputA, 0)[0].squeeze(0) b = torch.max(outputB, 0)[0].squeeze(0) features = torch.cat((a, b, a - b, a * b, (a + b) / 2.), 1) # (a + b) / 2. # took out subtraction since it's not in InferSent return self.out_proj(features) if __name__ == '__main__': # test SRU encoder = MF.SRU(input_size=5, hidden_size=5, num_layers=2, dropout=0.5, use_tanh=1, bidirectional=True) from torch.autograd import Variable x = Variable(torch.randn([3, 10, 5])) output, hidden = encoder(x) # output is (length, batch size, hidden size * number of directions) # hidden is (layers, batch size, hidden size * number of directions) # new output is [length, batch size, hidden size * number of directions] # with the size of layers import IPython IPython.embed()