def __init__(self, n_heads, d_models, dk, dv, dropout=0.1):
        super.__init__()

        self.n_heads = n_heads  # 8
        self.d_models = d_models  # 512
        self.dk = dk  # vector value
        self.dv = dv

        # TO multiply the q, k, v with the founded weight matrix we do the following

        self.w_qs = nn.linear(d_models, n_heads * dk)
        self.w_ks = nn.linear(d_models, n_heads * dk)
        self.w_vs = nn.linear(d_models, n_heads * dk)

        nn.init.normal_(self.w_qs.weights,
                        mean=0,
                        std=np.sqrt(2.0 / (d_models + dk)))
        nn.init.normal_(self.w_ks.weights,
                        mean=0,
                        std=np.sqrt(2.0 / (d_models + dk)))
        nn.init.normal_(self.w_vs.weights,
                        mean=0,
                        std=np.sqrt(2.0 / (d_models + dv)))

        self.attention = ScaledDotProductAttention()
        self.layerNorm = nn.LayerNorm(d_models)

        # 64 * 8 = 512, 512
        self.fc = nn.Linear(dk * n_heads, d_models)
        nn.init.xavier_normal_(self.fc.weights)

        self.dropout = nn.Dropout(dropout)
Beispiel #2
0
 def __init__(self, input_nodes, hidden_layers, output_nodes):
     super(Net, self).__init__()
     self.fc1 = nn.Linear(input_nodes, hidden_layers[0])
     for layer, num_nodes in enumerate(hidden_layers[1:]):
         name = "fc" + str(layer)
         self.name = nn.linear(hidden_layers[layer - 1], num_nodes)
         if layer == len(hidden_layers):
             self.name = nn.linear(num_nodes, output_nodes)
Beispiel #3
0
 def __init__(self, in_planes, ratio=reduction_ratio):
     super(SEBlock, self).__init__()
     self.in_planes = in_planes
     self.ratio = ratio
     self.GAP = nn.AdaptiveAvgPool2d(1)
     self.fc1 = nn.linear(in_planes, in_planes // self.ratio)
     self.relu1 = nn.ReLU(inplace=True)
     self.fc2 = nn.linear(in_planes // self.ratio, in_planes)
     self.Sigmoid = nn.Sigmoid()
    def __init__(self):
        super(deep_q_network, self).__init__()  # inherit properties and behaviour from neural network module pytorch

        # input data is: state
        # output data is: next_state
        self.fc1 = nn.linear(in_features=1, out_features=2)
        self.fc2 = nn.linear(in_features=3, out_features=4)
        self.fc3 = nn.linear(in_features=3, out_features=2)
        self.fc4 = nn.linear(in_features=3, out_features=1)
Beispiel #5
0
    def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
        """ Init NMT Model.

        @param embed_size (int): Embedding size (dimensionality)
        @param hidden_size (int): Hidden Size (dimensionality)
        @param vocab (Vocab): Vocabulary object containing src and tgt languages
                              See vocab.py for documentation.
        @param dropout_rate (float): Dropout probability, for attention
        """
        super(NMT, self).__init__()
        self.model_embeddings = ModelEmbeddings(embed_size, vocab)
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.vocab = vocab

        # default values
        self.encoder = None 
        self.decoder = None
        self.h_projection = None
        self.c_projection = None
        self.att_projection = None
        self.combined_output_projection = None
        self.target_vocab_projection = None
        self.dropout = None


        ### YOUR CODE HERE (~8 Lines)
        ### TODO - Initialize the following variables:
        ###     self.encoder (Bidirectional LSTM with bias)
        ###     self.decoder (LSTM Cell with bias)
        ###     self.h_projection (Linear Layer with no bias), called W_{h} in the PDF.
        ###     self.c_projection (Linear Layer with no bias), called W_{c} in the PDF.
        ###     self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF.
        ###     self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF.
        ###     self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF.
        ###     self.dropout (Dropout Layer)
        ###
        ### Use the following docs to properly initialize these variables:
        ###     LSTM:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM
        ###     LSTM Cell:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell
        ###     Linear Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        ###     Dropout Layer:
        ###         https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
		self.encoder = nn.LSTM(input_size=embed_size,
			hidden_size=self.hidden_size, num_layers=1,
			bias=True, batch_first=false,
			dropout=self.dropout_rate, bidirectional=True)
		self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size, hidden_size=self.hidden_size, bias=True) #input should be hidden_size (from encoder)+Embed of output language 
		Self.h_projection = nn.Linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.c_projection = nn.linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.att_projection = nn.linear(in_features=2*self.hidden_size, out_features=self.hidden_size)
		self.combined_output_projection = nn.linear(in_features=3*self.hidden_size, out_features=self.hidden_size)
		self.target_vocab_projection = nn.linear(in_features=self.hidden_size, out_features=self.model_embeddings.target.shape[0]) 
		self.dropout = nn.Dropout(drop=self.dropout_rate , impulse=False)
Beispiel #6
0
 def __init__(self, in_size, H_size):
     """ 
     Apply the output of the convolution later (x_conv) through a highway network
             @param in_size (int): Size of input layer; it's e_{word} (dimensionality)
             @param H_size (int): Size of Hidden layer; it's e_{word} (dimensionality)
     """
     # torch.nn.Linear(in_features, out_features, bias=True)
     self.proj = F.relu(nn.linear(in_size, H_size))
     self.gate = nn.Sigmoid(nn.linear(nn.linear(in_size, H_size)))
    def __init__(self, d_model, d_ff, dropout=0.1):
        """Initialize the class

		[Inputs]
		d_model : No of dimensions in model
		d_ff : no of hidden layer neurons in feed forward
		dropout : dropout rate"""
        super(PositionWiseFeedForward, self).__init__()
        self.w_1 = nn.linear(d_model, d_ff)
        self.w_2 = nn.linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
Beispiel #8
0
 def __init__(self,input_nc, output_nc, nlayers,ngd,nl_layer=None, use_dropout=False, gpu_ids=[]):
     super(G2net, self).__init__()
     self.gpu_ids=gpu_ids
     layers=[nl_layer(nn.linear(input_nc,ngd))]
     for n in range(1,nlayers):
         input_ngd=ngd*n
         output_ngd=ngd*(n+1)
         layers+=[nl_layer(nn.linear(input_ngd,output_ngd))]
     last_nl=functools.partial(nn.Sigmoid,inplace=True)
     layers+=[last_nl(nn.linear(input_ngd,output_ngd,output_nc))]
     self.fc=nn.Sequential(*layers)
Beispiel #9
0
    def __init__(self, input_size, num_units_lstm):
        super(LSTMNETWORK, self).__init__()

        # take the input
        self.encoderrnn = nn.LSTM(input_size, num_units_lstm)

        # a few linear layers needed for reparameterize trick
        self.mu = nn.linear(num_units_lstm, z_size)
        self.logvar = nn.linear(num_units_lstm, z_size)

        # take the sampled output and regenerate the image
        self.decoderrnn = nn.LSTM(z_size, input_size)
    def __init__(self):
        super(SensorToDryspotBoolModel, self).__init__()
        self.dropout = nn.dropout(0.1)
        self.maxpool = nn.maxpool2d(2, 2)
        self.conv1 = nn.conv2d(1, 32, (7, 7))
        self.conv2 = nn.conv2d(32, 64, (5, 5))
        self.conv3 = nn.conv2d(64, 128, (3, 3))
        self.conv4 = nn.conv2d(128, 256, (3, 3))

        self.fc1 = nn.linear(256, 1024)
        self.fc2 = nn.linear(1024, 512)
        self.fc3 = nn.linear(512, 128)
        self.fc_f = nn.linear(128, 1)
Beispiel #11
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 reducer='mean',
                 normalize_embedding=True):
        super(GraphSage, self).__init__(aggr='mean')
        self.lin = nn.linear(in_channels + out_channels,
                             in_channels,
                             bias=False)
        self.agg_lin = nn.linear(in_channels, out_channels)

        if normalize_embedding:
            self.normalize_emb = True
    def __init__(self, N_r, appear_dim=128):
        super(Relation, self).__init__()
        self.appear_dim = appear_dim
        self.d_k = 64
        self.d_g = 64
        self.N_r = 16
        # 128/16=8
        self.single_dim = self.appear_dim / self.N_r

        # encoder
        self.W_V = nn.Linear(128, self.single_dim)
        self.W_K = nn.linear(128, self.d_k)
        self.W_Q = nn.linear(128, self.d_k)
        self.W_G = nn.linear(self.d_g, 1)
Beispiel #13
0
    def __init__(self, lrate, in_size, out_size, momentum):
        super(convNet, self).__init__()

        #you need the layers, the loss function, and the optimizer
    
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.hidden1 = nn.linear(10*12*12, 300) #put the pooled features through a hidden layer
        self.output = nn.linear(300, out_size) #this layer classifies for us

        self.reLu = nn.ReLU()
        self.pool = nn.MaxPool2d(2,2)

    
        self.optimizer = optim.SGD(self.parameters(), lr=lrate, momentum=momentum)
        self.loss_fn = nn.CrossEntropyLoss()
Beispiel #14
0
class qa2index(nn.Module):
    def __init__(self,encoding_size =,dropout_rate=0.2,num_questions,question_size, kb_length):
        super(qa2index, self).__init__()

        self.glove = torchtext.vocab.GloVe(name='6B', dim=100)
        self.question_encoder = nn.LSTM(encoding_size,hidden_size,bidirectional=True, bias=True)
        #after autoencoder trained contexts -> we can add a weight to this layer.
        self.kb = nn.Linear(encoding_size,kb_length,bias=False)

        #multiplicative att between question encoding and the
        self.qa_l1 = nn.linear()
        self.qa_l2 = nn.linear()
        self.qa_l3 = nn.linear()
        self.qa_start = nn.linear()
        self.qa_end = nn.linear()
Beispiel #15
0
    def __init__(self, input_size, hidden_size):
        super(LSTMCell, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size

        # TODO:
        # nn.linear includes weights and biases
        # https://nn.readthedocs.io/en/rtd/simple/index.html#nn.Linear
        # https://discuss.pytorch.org/t/custom-lstm-cell-implementation/64566
        print(hidden_size)
        print(input_size)
        self.W_i = nn.linear(4 * hidden_size, input_size)
        self.W_f = nn.linear(4 * hidden_size, input_size)
        self.W_c = nn.linear(4 * hidden_size, input_size)
        self.W_o = nn.linear(4 * hidden_size, input_size)
    def __init__(self, state_dim, hidden_dim, output_dim, seed=0) -> None:
        super(Actor, self).__init__()
        self.seed = torch.manual_seed(seed)

        self.fc1 = nn.linear(2*state_dim, hidden_dim[0])
        self.bn1 = nn.BatchNorm1d(hidden_dim[0])

        self.fc2 = nn.linear(hidden_dim[0], hidden_dim[1])
        self.bn2 = nn.BatchNorm1d(hidden_dim[1])

        self.fc3 = nn.linear(hidden_dim[-1], output_dim)

        self.activation = f.leaky_relu

        self.register_parameter()
    def __init__(self, total_size, ics, init_weights=True):
        super(DenseNet, self).__init__()
        self.total_size = total_size
        self.init_weights = init_weights
        self.ics = ics
        self.num_ics = sum(self.ics)
        self.num_class = 10
        self.num_output = 0

        self.train_func = mf.iter_training_0
        self.test_func = mf.sdn_test

        self.input_size = 32
        self.in_channels = 16
        self.cum_in_channels = self.in_channels

        self.init_conv = nn.Sequential(*[
            nn.Conv2d(3, self.in_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(self.in_channels),
            nn.ReLu()
        ])

        self.end_layers = nn.Sequential(*[
            nn.AvgPool2d(kernel_size=8),
            af.Flatten(),
            nn.linear(2560, self.num_class)
        ])
        self.grow()

        if self.init_weights:
            self._init_weights(self.modules())
Beispiel #18
0
    def __init__(self, hidden_size, input_size):
        super(SpatioTemporal_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size

        self.linear = nn.linear(self.input_size + self.hidden_size , 4*self.hidden_size)

        self.weight_xg = Parameter(torch.Tensor())
        self.weight_hg = Parameter(torch.Tensor())
        self.weight_xi = Parameter(torch.Tensor())
        self.weight_hi = Parameter(torch.Tensor())
        self.weight_xf = Parameter(torch.Tensor())
        self.weight_hf = Parameter(torch.Tensor())
        self.weight_xg_ = Parameter(torch.Tensor())
        self.weight_mg = Parameter(torch.Tensor())
        self.weight_xi_ = Parameter(torch.Tensor())
        self.weight_mi = Parameter(torch.Tensor())
        self.weight_xf_ = Parameter(torch.Tensor())
        self.weight_mf = Parameter(torch.Tensor())
        self.weight_xo = Parameter(torch.Tensor())
        self.weight_ho = Parameter(torch.Tensor())
        self.weight_co = Parameter(torch.Tensor())
        self.weight_mo = Parameter(torch.Tensor())
        self.weight_1x1 = Parameter(torch.Tensor(1,1))

        if bias:
            self.bias_g = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_i = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_f = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_g_ = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_i_ = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_f_ = Parameter(torch.Tensor(4 * hidden_size))
            self.bias_o = Parameter(torch.Tensor(4 * hidden_size))
        
        self._reset_parameters()
Beispiel #19
0
 def __init__(self, ntoken, nwe, nz, cond_fusion, nhid, nout, nlayers, cell, dropouti, dropoutl, dropoutw, dropouto):
     super().__init__()
     assert cond_fusion in ('cat', 'h0', 'w0')
     assert dropoutl == 0 or nlayers > 1
     # attributes
     self.do_downproj = nlayers == 1 and nout != nhid
     self.nhid = nhid
     self.nlayers = nlayers
     self.cond_fusion = cond_fusion
     self.cell = cell
     self.nwe = nwe
     self.nz = nz
     # modules
     # -- rnn
     ninp = nwe + nz if cond_fusion == 'cat' else nwe
     nout = nhid if self.do_downproj else nout
     self.rnn = RNN(cell, ninp, nhid, nout, nlayers, dropouti, dropoutl, dropoutw, dropouto)
     # -- condition fusion
     if cond_fusion == 'h0':
         self.up_proj = nn.Linear(nz, nhid * nlayers)
     if cond_fusion == 'w0':
         self.up_proj = nn.Linear(nz, nwe)
     # -- down projection
     if self.do_downproj:
         self.downproj = nn.linear(nhid, nout)
     # -- decoder
     self.decoder = nn.Linear(nout, ntoken)
     self.decoder.weight.data.uniform_(-0.1, 0.1)
     self.decoder.bias.data.zero_()
Beispiel #20
0
 def __init__(self, config, vocab_size, embed_matrix):
     super(ESIM, self).__init__()
     # Input encoding
     self.embedding = nn.Embedding(vocab_size, config.embed_dim)
     self.enc_lstm = nn.LSTM(config.embed_dim,
                             config.hid_dim,
                             batch_first=True,
                             dropout=config.dropout,
                             bidirectional=True)
     # Local inference modeling
     # self.attention()
     # Inference composition
     self.dense = nn.Sequential(
         nn.Linear(config.hid_dim * 4, config.hid_dim), nn.ReLU())
     self.infer_lstm = nn.LSTM(config.hid_dim,
                               config.hid_dim,
                               batch_first=True,
                               dropout=config.dropout,
                               bidirectional=True)
     # Prediction
     self.ave_pool = nn.AvgPool2d((3, config.hid_dim * 2), (1, 0),
                                  padding=(1, 0))
     self.max_pool = nn.MaxPool2d((3, config.hid_dim * 2), (1, 0),
                                  padding=(1, 0))
     self.MLP = nn.Sequential(
         nn.Dropout(), nn.linear(config.hid_dim * 2, config.hid_dim * 4),
         nn.Tanh(), nn.Linear(config.hid_dim * 4, config.out_dim),
         nn.Softmax())
Beispiel #21
0
    def __init__(self,
                 enc_dim=512,
                 lstm_memory=512,
                 attention_size=512,
                 emb_size=512,
                 vocab_size=100,
                 max_seqlen=20,
                 num_layers=1,
                 dropout_p=0.1
                 ):
        super(LSTMCaptioning, self).__init__()
        self.enc_dim = enc_dim
        self.lstm_memory = lstm_memory
        self.attention_size = attention_size
        self.emb_size = emb_size
        self.vocab_size = vocab_size
        self.max_seqlen = max_seqlen
        self.num_layers = num_layers

        self.attention = Attention(enc_dim, lstm_memory, attention_size)
        self.init_h = nn.Linear(enc_dim, lstm_memory)
        self.init_c = nn.Linear(enc_dim, lstm_memory)
        self.gate = nn.Linear(enc_dim, lstm_memory)
        self.emb = nn.Embedding(vocab_size, emb_size)
        #self.rnn = nn.LSTM(self.emb_size, self.lstm_memory, self.num_layers, batch_first=True, dropout=dropout_p)
        self.rnn = nn.LSTMCell(emb_size+enc_dim, lstm_memory)
        self.sigmoid = nn.Sigmoid()
        self.outlinear = nn.linear(lstm_memory, vocab_size)
Beispiel #22
0
 def __init__(self):
     super(Net, self).__init__()
     self.conv1 = nn.Conv2d(1, 6, 3)
     self.conv2 = nn.conv2d(6, 16, 3)
     self.fc1 = nn.linear(16 * 6 * 6, 120)
     self.fc2 - nn.Linear(120, 84)
     self.fc3 = nn.Linear(84, 10)
 def __init__(self):
     self.net = nn.sequental(
         nn.Conv2d(16,32, kernel=3, stride = 1, padding = 1),
         nn.BatchNormal2d(32),
         nn.relu()
     )
     self.fc = nn.linear(32, 10)
Beispiel #24
0
    def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
        '''
            num_layers: number of layers in the neural networks (EXCLUDING the input layer). If num_layers=1, this reduces to linear model.
            input_dim: dimensionality of input features
            hidden_dim: dimensionality of hidden units at ALL layers
            output_dim: number of classes for prediction
            device: which device to use
        '''

        super (MLP, self).__init__()

        self.linear_or_not = True #default is linear model
        self.num_layers = num_layers

        if num_layers < 1:
            raise ValueError("number of layers should be positive!")
        elif num_layers == 1:
            #linear model
            self.linear = nn.linear(input_dim, output_dim)
        else:
            # Multi-layer model
            self.linear_or_not = False
            self.linears = torch.nn.ModuleList()
            self.batch_norms = torch.nn.ModuleList()

            self.linears.append(nn.Linear(input_dim, hidden_dim))
            for layer in range(num_layers - 2):
                self.linears.append(nn.Linear(hidden_dim, hidden_dim))
            self.linears.append(nn.Linear(hidden_dim, output_dim))

            for layer in range(num_layers - 1):
                self.batch_norms.append(nn.BatchNorm1d((hidden_dim)))
Beispiel #25
0
    def __init__(self, num_inputs, recurrent=False, grid=False, hidden_size=512):
        super(PNNConvBase, self).__init__(recurrent, hidden_size, hidden_size)
        self.columns = nn.ModuleList([])
        self.num_inputs = num_inputs
        self.hidden_size = hidden_size
        self.recurrent = recurrent
        self.alpha = nn.ModuleList([])
        self.V = nn.ModuleList([])
        self.U = nn.ModuleList([])
        self.flatten = Flatten()
        self.grid = grid

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        if grid:
            self.critic_linear = nn.Sequential(
                init_(nn.Linear(self.hidden_size, 64)),
                nn.Tanh(),
                init_(nn.Linear(64, 1))
            )
        else:
            self.critic_linear = init_(nn.linear(self.hidden_size,1))
        

        self.train()
        self.n_layers = 4
 def __init__(self,vocab_size,embedding_size,hidden_size,output_size,num_layers,p):
     super(Decoder,self).__init__()
     self.embedding=nn.Embedding(vocab_size,embedding_size)
     self.hidden_size=hidden_size
     self.num_layers=num_layers
     self.rnn=nn.LSTM(embedding_size,hidden_size,num_layers,dropout=p)
     self.fc=nn.linear(hidden_size,output_size)
Beispiel #27
0
	def __init__(self, batch_size,embed_size,hidden_size,num_class, n_layers=1 ,dropout=0.5):
		super(LSTMGait,self).__init__()
		self.batch_size = batch_size
		self.embed_size = embed_size
		self.hidden_size =hidden_size
		self.num_class = num_class
		self.lstm = nn.LSTM(input_size=self.embed_size, hidden_size=self.hidden_dim, num_layers=1, batch_first=True)
		self.classifier = nn.linear(self.hidden_size, self.num_class)
def _conv_linear(args,
                 filter_size,
                 num_features,
                 bias,
                 bias_start=0.0,
                 scope=None):
    """convolution:
    Args:
    args: a 4D Tensor or a list of 4D, batch x n, Tensors.
    filter_size: int tuple of filter height and width.
    num_features: int, number of features.
    bias_start: starting value to initialize the bias; 0 by default.
    scope: VariableScope for the created subgraph; defaults to "Linear".
    Returns:
    A 4D Tensor with shape [batch h w num_features]
    Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
    #Calculate the total size of arguments on dimension 1
    total_args_size_depth = 0
    shapes = [a.get_shape().as_lists() for a in args]

    for shape in shapes:
        if len(shapes) != 4:
            raise ValueError("Linear is expecting 4D arguments: %s" %
                             str(shapes))

        if not shape[3]:
            raise ValueError("Linear expects shape[4] of arguments: %s" %
                             str(shapes))

        else:
            total_arg_size_depth += shape[3]

    dtype = [a.dtype for a in args][0]

    matrix = Variable(
        "Matrix",
        [filter_size[0], filter_size[1], total_arg_size_depth, num_features],
        dtype=dtype)

    if len(args) == 1:
        res = F.Conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME')

    else:
        res = F.Conv2d(torch.cat((args), 3),
                       matrix,
                       strides=[1, 1, 1, 1],
                       padding='SAME')

    if not bias:
        return res

    bias_term = Variable("Bias", [num_features],
                         dtype=dtype,
                         initializer=nn.linear(bias_start, dtype=dtype))

    return res + bias_term
    def __init__(self, full_state_dim, full_action_dim, hidden_dim,
                 output_dim, seed=0, output_act=False):

            super(Critic, self).__init__()
            self.seed = torch.manual_seed(seed)

            self.fc1 = nn.linear(full_state_dim, hidden_dim[0])
            self.bn1 = nn.BatchNorm1d(hidden_dim[0])

            self.fc2 = nn.linear(hidden_dim[0]+full_action_dim, 
                                 hidden_dim[1])
            self.bn2 = nn.BatchNorm1d(hidden_dim[1])

            self.fc3 = nn.linear(hidden_dim[-1], output_dim)

            self.activation = f.leaky_relu
            self.out_act = output_act
            
            self.reset_parameters()
Beispiel #30
0
	def __init__(self, hidden_size, output_size):
		super(DecoderRNN, self).__init__()

		self.hidden_size = hidden_size
		self.output_size = output_size

		self.embedding = nn.Embedding(hidden_size, output_size)
		self.gru = nn.GRU(hidden_size, hidden_size)
		self.linear = nn.linear(hidden_size, hidden_size)
		self.softmax = nn.LogSoftmax(dim = 1)