class lstmwrapper(nn.Module): def __init__(self, input_size=66529, output_size=5952, hidden_size=52, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapper, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.output = nn.Linear(hidden_size, output_size) self.bn = nn.BatchNorm1d(input_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = self.bn(input) output, statetuple = self.lstm(input, hx) return self.output(output)
class LSTM_vocab(nn.Module): def __init__(self, vocab_size=50000, vocab_embed_d=512, output_size=12, hidden_size=256, *args, **kwargs): super(LSTM_vocab, self).__init__() self.src_word_emb = nn.Embedding(vocab_size, vocab_embed_d, padding_idx=0) self.lstm = LSTM(input_size=vocab_embed_d, hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = self.src_word_emb(input) output, statetuple = self.lstm(input, hx) # this is a design decision that can be experimented with output = self.output(output) # output=torch.max(output,dim=1)[0] output = output[:, -1, :] return output
def testbid(): lstm=LSTM(input_size=100, hidden_size=77, batch_first=True, dropout=0.1) lstmbi=LSTM(input_size=100, hidden_size=77, batch_first=True, dropout=0.1, bidirectional=True) input=Variable(torch.Tensor(64,8,100)) output=lstm(input, None) outputbi=lstmbi(input, None) print(output[0].shape, output[1][0].shape, output[1][1].shape) print(outputbi[0].shape, outputbi[1][0].shape, output[1][1].shape) print("done")
def __init__(self, x, R, W, h, L, v_t): super(Stock_LSTM, self).__init__() self.x = x self.R = R self.W = W self.h = h self.L = L self.v_t= v_t self.LSTM=LSTM(input_size=self.x+self.R*self.W,hidden_size=h,num_layers=L,batch_first=True, dropout=0.1) self.last=nn.Linear(self.h, self.v_t) self.st=None
def __init__(self, input_size=66529, output_size=5952, hidden_size=52, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapper, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters()
def __init__(self, hidden_size, num_window, num_mixture_components): super(HandwritingGenerator, self).__init__() # First LSTM layer, takes as input a tuple (x, y, eol) self.lstm1_layer = LSTM(input_size=3, hidden_size=hidden_size, batch_first=True) # self.window_layer = Attention(n_inputs=hidden_size, # n_mixture_components=num_window) self.window_layer = GaussianWindow( input_size=hidden_size, num_components=num_mixture_components) self.mdn = mdn(n_inputs=hidden_size, n_mixture_components=num_mixture_components) # Hidden State Variables self.hidden_size = hidden_size self.hidden1 = None self.prev_kappa = None # Initiliaze parameters self.reset_parameters()
def __init__(self, input_size, embed_size, hidden_size, aspect_size, num_class, embedding=None): super(ATAELSTM, self).__init__() self.embed_size = embed_size self.aspect_size = aspect_size self.num_class = num_class # emeddding if embedding is not None: self.embeding = Embedding.from_pretrained(torch.Tensor(embedding)) self.embeding.weight.requires_grad = False else: self.embeding = Embedding(input_size, embed_size, padding_idx=0) # (batch size, N, embedding size) self.apect_embeding = Embedding(aspect_size, embed_size) self.rnn = LSTM(input_size=embed_size, hidden_size=hidden_size, bidirectional=True, batch_first=True, num_layers=1) self.att = Attention(hidden_size * 2, aspect_size) self.fc = Linear(hidden_size * 2, num_class, bias=True)
def __init__(self, vocab_size=50000, vocab_embed_d=512, output_size=12, hidden_size=256, *args, **kwargs): super(LSTM_vocab, self).__init__() self.src_word_emb = nn.Embedding(vocab_size, vocab_embed_d, padding_idx=0) self.lstm = LSTM(input_size=vocab_embed_d, hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters()
def __init__(self, prior, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(PriorLSTM, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() self.prior = prior '''prior''' # this is the prior probability of each label predicting true # this is added to the logit self.prior = prior if isinstance(self.prior, np.ndarray): self.prior = torch.from_numpy(self.prior).float() self.prior = Variable(self.prior, requires_grad=False) elif isinstance(self.prior, torch.Tensor): self.prior = Variable(self.prior, requires_grad=False) else: assert (isinstance(self.prior, Variable)) # transform to logits # because we are using sigmoid, not softmax, self.prior=log(P(y))-log(P(not y)) # sigmoid_input = z + self.prior # z = log(P(x|y)) - log(P(x|not y)) # sigmoid output is the posterior positive self.prior = self.prior.clamp(1e-8, 1 - 1e-8) self.prior = torch.log(self.prior) - torch.log(1 - self.prior) a = Variable(torch.Tensor([0])) self.prior = torch.cat((a, self.prior)) self.prior = self.prior.cuda() for name, param in self.named_parameters(): print(name, param.data.shape) print("Using prior lstm")
class LSTMWrapper(nn.Module): def __init__(self, output_size=12, hidden_size=256, *args, **kwargs): super(LSTMWrapper, self).__init__() self.lstm = LSTM(hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): output, statetuple = self.lstm(input, hx) # this is a design decision that can be experimented with output = self.output(output) # output=torch.max(output,dim=1)[0] output = output[:, -1, :] return output
def __init__(self, input_size, hidden_size, label_size): super(ClassifierLSTM, self).__init__() self.hidden_size = hidden_size self.label_size = label_size self.input_size = input_size self.lstm = LSTM(input_size, hidden_size) self.linear = nn.Linear(hidden_size, label_size)
class lstmwrapperJ(nn.Module): def __init__(self, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapperJ, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() for name, param in self.named_parameters(): print(name, param.data.shape) def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = input.permute(0, 2, 1).contiguous() try: bnout = self.bn(input) bnout[(bnout != bnout).detach()] = 0 except ValueError: if step_input.shape[0] == 1: print("Somehow the batch size is one for this input") bnout = step_input else: raise input = bnout.permute(0, 2, 1).contiguous() output, statetuple = self.lstm(input, hx) output = self.output(output) # (batch_size, seq_len, target_dim) # pdb.set_trace() # output=output.sum(1) output = output.max(1)[0] return output
def __init__(self, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapperJ, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() for name, param in self.named_parameters(): print(name, param.data.shape)
def __init__(self, alphabet_size, hidden_size, num_window_components, num_mixture_components): super(HandwritingGenerator, self).__init__() self.alphabet_size = alphabet_size self.hidden_size = hidden_size self.num_window_components = num_window_components self.num_mixture_components = num_mixture_components # First LSTM layer, takes as input a tuple (x, y, eol) self.lstm1_layer = LSTM(input_size=3, hidden_size=hidden_size, batch_first=True) # Gaussian Window layer self.window_layer = GaussianWindow( input_size=hidden_size, num_components=num_window_components) # Second LSTM layer, takes as input the concatenation of the input, # the output of the first LSTM layer # and the output of the Window layer self.lstm2_layer = LSTM( input_size=3 + hidden_size + alphabet_size + 1, hidden_size=hidden_size, batch_first=True, ) # Third LSTM layer, takes as input the concatenation of the output of the first LSTM layer, # the output of the second LSTM layer # and the output of the Window layer self.lstm3_layer = LSTM(input_size=hidden_size, hidden_size=hidden_size, batch_first=True) # Mixture Density Network Layer self.output_layer = MDN(input_size=hidden_size, num_mixtures=num_mixture_components) # Hidden State Variables self.prev_kappa = None self.hidden1 = None self.hidden2 = None self.hidden3 = None # Initiliaze parameters self.reset_parameters()
class Stock_LSTM(nn.Module): """ I prefer using this Stock LSTM for numerical stability. """ def __init__(self, x, R, W, h, L, v_t): super(Stock_LSTM, self).__init__() self.x = x self.R = R self.W = W self.h = h self.L = L self.v_t= v_t self.LSTM=LSTM(input_size=self.x+self.R*self.W,hidden_size=h,num_layers=L,batch_first=True, dropout=0.1) self.last=nn.Linear(self.h, self.v_t) self.st=None def forward(self, input_x): """ :param input_x: input and memory values :return: """ assert (self.st is not None) o, st = self.LSTM(input_x, self.st) if (st[0]!=st[0]).any(): with open("debug/lstm.pkl") as f: pickle.dump(self, f) with open("debug/lstm.pkl") as f: pickle.dump(input_x, f) raise ("LSTM produced a NAN, objects dumped.") return self.last(o), st def reset_parameters(self): self.LSTM.reset_parameters() self.last.reset_parameters() def assign_states_tuple(self, states_tuple): self.st=states_tuple
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, hidden_size: int = 200, num_layers: int = 2) -> None: super(SimpleTagger, self).__init__() self.vocab = vocab self.text_field_embedder = text_field_embedder self.hidden_size = hidden_size self.num_layers = num_layers self.num_classes = self.vocab.get_vocab_size("tags") # TODO(Mark): support masking once utility functions are merged. self.stacked_encoders = LSTM(self.text_field_embedder.get_output_dim(), self.hidden_size, self.num_layers, batch_first=True) self.tag_projection_layer = TimeDistributed( Linear(self.hidden_size, self.num_classes)) self.sequence_loss = torch.nn.CrossEntropyLoss()
class PriorLSTM(nn.Module): def __init__(self, prior, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(PriorLSTM, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() self.prior = prior '''prior''' # this is the prior probability of each label predicting true # this is added to the logit self.prior = prior if isinstance(self.prior, np.ndarray): self.prior = torch.from_numpy(self.prior).float() self.prior = Variable(self.prior, requires_grad=False) elif isinstance(self.prior, torch.Tensor): self.prior = Variable(self.prior, requires_grad=False) else: assert (isinstance(self.prior, Variable)) # transform to logits # because we are using sigmoid, not softmax, self.prior=log(P(y))-log(P(not y)) # sigmoid_input = z + self.prior # z = log(P(x|y)) - log(P(x|not y)) # sigmoid output is the posterior positive self.prior = self.prior.clamp(1e-8, 1 - 1e-8) self.prior = torch.log(self.prior) - torch.log(1 - self.prior) a = Variable(torch.Tensor([0])) self.prior = torch.cat((a, self.prior)) self.prior = self.prior.cuda() for name, param in self.named_parameters(): print(name, param.data.shape) print("Using prior lstm") def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = input.permute(0, 2, 1).contiguous() bnout = self.bn(input) bnout[(bnout != bnout).detach()] = 0 input = bnout.permute(0, 2, 1).contiguous() output, statetuple = self.lstm(input, hx) output = self.output(output) # (batch_size, seq_len, target_dim) # pdb.set_trace() # output=output.sum(1) output = output.max(1)[0] output = output + self.prior return output
class HandwritingGenerator(Module): def __init__(self, alphabet_size, hidden_size, num_window_components, num_mixture_components): super(HandwritingGenerator, self).__init__() self.alphabet_size = alphabet_size self.hidden_size = hidden_size self.num_window_components = num_window_components self.num_mixture_components = num_mixture_components # print(num_window_components) # print(num_mixture_components) # print(hidden_size) self.input_size = input_size = 3 n_heads_1 = 2 n_heads_2 = 10 query_dimensions = 1 self.n_pre_layers = 2 self.n_layers = 4 # n_heads_2 = 4 # First LSTM layer, takes as input a tuple (x, y, eol) # self.lstm1_layer = LSTM(input_size=3, hidden_size=hidden_size, batch_first=True) # [ # TransformerEncoderLayer( # AttentionLayer(FullAttention(), 768, 12), # 768, # 12, # activation="gelu" # ) for l in range(12) # ], # norm_layer=torch.nn.LayerNorm(768) self.lstm1_layer = LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True) # self.transformers1_layers = [ # RecurrentTransformerEncoderLayer( # RecurrentAttentionLayer(RecurrentLinearAttention(query_dimensions), input_size, n_heads_1), # input_size, # hidden_size, # activation="gelu" # ) for l in range(self.n_pre_layers) # ] # self.norm1_layer = torch.nn.Linear(input_size, hidden_size) # Gaussian Window layer self.window_layer = GaussianWindow( input_size=hidden_size, num_components=num_window_components) # Second LSTM layer, takes as input the concatenation of the input, # the output of the first LSTM layer # and the output of the Window layer # self.lstm2_layer = LSTM( # input_size=3 + hidden_size + alphabet_size + 1, # hidden_size=hidden_size, # batch_first=True, # ) self.transformers2_layers = [ RecurrentTransformerEncoderLayer( RecurrentAttentionLayer( RecurrentLinearAttention(query_dimensions), 3 + hidden_size + alphabet_size + 1, n_heads_2), 3 + hidden_size + alphabet_size + 1, # RecurrentAttentionLayer(RecurrentLinearAttention(query_dimensions), hidden_size, n_heads_2), # hidden_size, hidden_size, activation="gelu") for l in range(self.n_layers) ] # Third LSTM layer, takes as input the concatenation of the output of the first LSTM layer, # the output of the second LSTM layer # and the output of the Window layer # self.lstm3_layer = LSTM( # input_size=hidden_size, hidden_size=hidden_size, batch_first=True # ) # print( 3 + hidden_size + alphabet_size + 1) # print(hidden_size) self.norm2_layer = torch.nn.LayerNorm(3 + hidden_size + alphabet_size + 1) # self.norm2_layer = torch.nn.LayerNorm(hidden_size) # self.norm2_layer = torch.nn.Linear(hidden_size) # Mixture Density Network Layer self.output_layer = MDN( input_size=3 + hidden_size + alphabet_size + 1, num_mixtures=num_mixture_components # input_size=hidden_size, num_mixtures=num_mixture_components ) # Hidden State Variables self.prev_kappa = None # self.hidden1 = None self.hidden1 = None # self.hidden1 = [None] * self.n_pre_layers self.hidden2 = [None] * self.n_layers # self.hidden3 = None # Initiliaze parameters self.reset_parameters() def forward(self, strokes, onehot, bias=None): # First LSTM Layer # input_ = strokes.reshape(-1,self.input_size) input_ = strokes # self.lstm1_layer.flatten_parameters() # print(input_.shape) # print(self.hidden1) # for i, l in enumerate(self.transformers1_layers): # input_, self.hidden1[i] = l(input_, self.hidden1[i]) # # print(output1.shape) # output1 = self.norm1_layer(input_) # output1 = output1.reshape(-1,1,self.hidden_size) self.lstm1_layer.flatten_parameters() output1, self.hidden1 = self.lstm1_layer(input_, self.hidden1) # print(output1.shape) # print(onehot.shape) # print(self.prev_kappa) # print(output1.shape, self.hidden1.shape) # output1, self.hidden1 = self.lstm1_layer(input_, self.hidden1) # output1 = [] # self.hidden1 = [] # for i in input_: # o = self.lstm1_layer(i, self.hidden1) # print(o) # output1.append(o) # self.hidden1.append(h1) # print(output1.shape) # Gaussian Window Layer window, self.prev_kappa, phi = self.window_layer( output1, onehot, self.prev_kappa) # print(output1.shape) # print(strokes.shape) # print(window.shape) # print(self.hidden2) # Second LSTM Layer # torch.squeeze(output1) # print(torch.cat((strokes, output1, window), dim=2).shape) output2 = torch.cat( (strokes, output1, window), # dim=2).squeeze() dim=2).reshape( -1, strokes.shape[-1] + output1.shape[-1] + window.shape[-1]) for i, l in enumerate(self.transformers2_layers): output2, self.hidden2[i] = l(output2, self.hidden2[i]) # print(output2.shape) # print([h.shape for h in self.hidden2]) # print(self.hidden3.shape) # Third LSTM Layer output3 = self.norm2_layer(output2) # MDN Layer eos, pi, mu1, mu2, sigma1, sigma2, rho = self.output_layer( output3.reshape(-1, 1, output3.shape[-1]), bias) return (eos, pi, mu1, mu2, sigma1, sigma2, rho), (window, phi) @staticmethod def sample_bivariate_gaussian(pi, mu1, mu2, sigma1, sigma2, rho): # Pick distribution from the MDN p = pi.data[0, 0, :].numpy() idx = np.random.choice(p.shape[0], p=p) m1 = mu1.data[0, 0, idx] m2 = mu2.data[0, 0, idx] s1 = sigma1.data[0, 0, idx] s2 = sigma2.data[0, 0, idx] r = rho.data[0, 0, idx] mean = [m1, m2] covariance = [[s1**2, r * s1 * s2], [r * s1 * s2, s2**2]] Z = torch.autograd.Variable( sigma1.data.new(np.random.multivariate_normal(mean, covariance, 1))).unsqueeze(0) X = Z[:, :, 0:1] Y = Z[:, :, 1:2] return X, Y def reset_state(self): self.prev_kappa = None self.hidden1 = None # self.hidden1 = [None] * self.n_pre_layers self.hidden2 = [None] * self.n_layers # self.hidden3 = None def reset_parameters(self): for parameter in self.parameters(): if len(parameter.size()) == 2: torch.nn.init.xavier_uniform_(parameter, gain=1.0) else: stdv = 1.0 / parameter.size(0) torch.nn.init.uniform_(parameter, -stdv, stdv) def num_parameters(self): num = 0 for weight in self.parameters(): num = num + weight.numel() return num @classmethod def load_model(cls, parameters: dict, state_dict: dict): model = cls(**parameters) model.load_state_dict(state_dict) return model def __deepcopy__(self, *args, **kwargs): model = HandwritingGenerator( self.alphabet_size, self.hidden_size, self.num_window_components, self.num_mixture_components, ) return model
def __init__(self, output_size=12, hidden_size=256, *args, **kwargs): super(LSTMWrapper, self).__init__() self.lstm = LSTM(hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters()
def __init__(self, alphabet_size, hidden_size, num_window_components, num_mixture_components): super(HandwritingGenerator, self).__init__() self.alphabet_size = alphabet_size self.hidden_size = hidden_size self.num_window_components = num_window_components self.num_mixture_components = num_mixture_components # print(num_window_components) # print(num_mixture_components) # print(hidden_size) self.input_size = input_size = 3 n_heads_1 = 2 n_heads_2 = 10 query_dimensions = 1 self.n_pre_layers = 2 self.n_layers = 4 # n_heads_2 = 4 # First LSTM layer, takes as input a tuple (x, y, eol) # self.lstm1_layer = LSTM(input_size=3, hidden_size=hidden_size, batch_first=True) # [ # TransformerEncoderLayer( # AttentionLayer(FullAttention(), 768, 12), # 768, # 12, # activation="gelu" # ) for l in range(12) # ], # norm_layer=torch.nn.LayerNorm(768) self.lstm1_layer = LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True) # self.transformers1_layers = [ # RecurrentTransformerEncoderLayer( # RecurrentAttentionLayer(RecurrentLinearAttention(query_dimensions), input_size, n_heads_1), # input_size, # hidden_size, # activation="gelu" # ) for l in range(self.n_pre_layers) # ] # self.norm1_layer = torch.nn.Linear(input_size, hidden_size) # Gaussian Window layer self.window_layer = GaussianWindow( input_size=hidden_size, num_components=num_window_components) # Second LSTM layer, takes as input the concatenation of the input, # the output of the first LSTM layer # and the output of the Window layer # self.lstm2_layer = LSTM( # input_size=3 + hidden_size + alphabet_size + 1, # hidden_size=hidden_size, # batch_first=True, # ) self.transformers2_layers = [ RecurrentTransformerEncoderLayer( RecurrentAttentionLayer( RecurrentLinearAttention(query_dimensions), 3 + hidden_size + alphabet_size + 1, n_heads_2), 3 + hidden_size + alphabet_size + 1, # RecurrentAttentionLayer(RecurrentLinearAttention(query_dimensions), hidden_size, n_heads_2), # hidden_size, hidden_size, activation="gelu") for l in range(self.n_layers) ] # Third LSTM layer, takes as input the concatenation of the output of the first LSTM layer, # the output of the second LSTM layer # and the output of the Window layer # self.lstm3_layer = LSTM( # input_size=hidden_size, hidden_size=hidden_size, batch_first=True # ) # print( 3 + hidden_size + alphabet_size + 1) # print(hidden_size) self.norm2_layer = torch.nn.LayerNorm(3 + hidden_size + alphabet_size + 1) # self.norm2_layer = torch.nn.LayerNorm(hidden_size) # self.norm2_layer = torch.nn.Linear(hidden_size) # Mixture Density Network Layer self.output_layer = MDN( input_size=3 + hidden_size + alphabet_size + 1, num_mixtures=num_mixture_components # input_size=hidden_size, num_mixtures=num_mixture_components ) # Hidden State Variables self.prev_kappa = None # self.hidden1 = None self.hidden1 = None # self.hidden1 = [None] * self.n_pre_layers self.hidden2 = [None] * self.n_layers # self.hidden3 = None # Initiliaze parameters self.reset_parameters()
class HandwritingGenerator(Module): def __init__(self, alphabet_size, hidden_size, num_window_components, num_mixture_components): super(HandwritingGenerator, self).__init__() self.alphabet_size = alphabet_size self.hidden_size = hidden_size self.num_window_components = num_window_components self.num_mixture_components = num_mixture_components # First LSTM layer, takes as input a tuple (x, y, eol) self.lstm1_layer = LSTM(input_size=3, hidden_size=hidden_size, batch_first=True) # Gaussian Window layer self.window_layer = GaussianWindow( input_size=hidden_size, num_components=num_window_components) # Second LSTM layer, takes as input the concatenation of the input, # the output of the first LSTM layer # and the output of the Window layer self.lstm2_layer = LSTM( input_size=3 + hidden_size + alphabet_size + 1, hidden_size=hidden_size, batch_first=True, ) # Third LSTM layer, takes as input the concatenation of the output of the first LSTM layer, # the output of the second LSTM layer # and the output of the Window layer self.lstm3_layer = LSTM(input_size=hidden_size, hidden_size=hidden_size, batch_first=True) # Mixture Density Network Layer self.output_layer = MDN(input_size=hidden_size, num_mixtures=num_mixture_components) # Hidden State Variables self.prev_kappa = None self.hidden1 = None self.hidden2 = None self.hidden3 = None # Initiliaze parameters self.reset_parameters() def forward(self, strokes, onehot, bias=None): # First LSTM Layer input_ = strokes self.lstm1_layer.flatten_parameters() output1, self.hidden1 = self.lstm1_layer(input_, self.hidden1) # Gaussian Window Layer window, self.prev_kappa, phi = self.window_layer( output1, onehot, self.prev_kappa) # Second LSTM Layer output2, self.hidden2 = self.lstm2_layer( torch.cat((strokes, output1, window), dim=2), self.hidden2) # Third LSTM Layer output3, self.hidden3 = self.lstm3_layer(output2, self.hidden3) # MDN Layer eos, pi, mu1, mu2, sigma1, sigma2, rho = self.output_layer( output3, bias) return (eos, pi, mu1, mu2, sigma1, sigma2, rho), (window, phi) @staticmethod def sample_bivariate_gaussian(pi, mu1, mu2, sigma1, sigma2, rho): # Pick distribution from the MDN p = pi.data[0, 0, :].numpy() idx = np.random.choice(p.shape[0], p=p) m1 = mu1.data[0, 0, idx] m2 = mu2.data[0, 0, idx] s1 = sigma1.data[0, 0, idx] s2 = sigma2.data[0, 0, idx] r = rho.data[0, 0, idx] mean = [m1, m2] covariance = [[s1**2, r * s1 * s2], [r * s1 * s2, s2**2]] Z = torch.autograd.Variable( sigma1.data.new(np.random.multivariate_normal(mean, covariance, 1))).unsqueeze(0) X = Z[:, :, 0:1] Y = Z[:, :, 1:2] return X, Y def reset_state(self): self.prev_kappa = None self.hidden1 = None self.hidden2 = None self.hidden3 = None def reset_parameters(self): for parameter in self.parameters(): if len(parameter.size()) == 2: torch.nn.init.xavier_uniform(parameter, gain=1.0) else: stdv = 1.0 / parameter.size(0) torch.nn.init.uniform(parameter, -stdv, stdv) def num_parameters(self): num = 0 for weight in self.parameters(): num = num + weight.numel() return num @classmethod def load_model(cls, parameters: dict, state_dict: dict): model = cls(**parameters) model.load_state_dict(state_dict) return model def __deepcopy__(self, *args, **kwargs): model = HandwritingGenerator( self.alphabet_size, self.hidden_size, self.num_window_components, self.num_mixture_components, ) return model