def learn_distribution( distr_output: DistributionOutput, samples: torch.Tensor, init_biases: List[np.ndarray] = None, num_epochs: int = 5, learning_rate: float = 1e-2, ): arg_proj = distr_output.get_args_proj(in_features=1) if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): nn.init.constant_(param.bias, bias) dummy_data = torch.ones((len(samples), 1, 1)) dataset = TensorDataset(dummy_data, samples) train_data = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) optimizer = SGD(arg_proj.parameters(), lr=learning_rate) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 for i, (data, sample_label) in enumerate(train_data): optimizer.zero_grad() distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = -distr.log_prob(sample_label).mean() loss.backward() clip_grad_norm_(arg_proj.parameters(), 10.0) optimizer.step() num_batches += 1 cumulative_loss += loss.item() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) sampling_dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) i, (data, sample_label) = next(enumerate(sampling_dataloader)) distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) samples = distr.sample((NUM_SAMPLES, )) with torch.no_grad(): percentile_90 = distr.quantile_function(torch.ones((1, 1, 1)), torch.ones((1, 1)) * 0.9) percentile_10 = distr.quantile_function(torch.ones((1, 1, 1)), torch.ones((1, 1)) * 0.1) return samples.mean(), samples.std(), percentile_10, percentile_90
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: torch.Tensor, init_biases: List[np.ndarray] = None, num_epochs: int = 5, learning_rate: float = 1e-2, ): arg_proj = distr_output.get_args_proj(in_features=1) if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): nn.init.constant_(param.bias, bias) dummy_data = torch.ones((len(samples), 1)) dataset = TensorDataset(dummy_data, samples) train_data = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) optimizer = SGD(arg_proj.parameters(), lr=learning_rate) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 for i, (data, sample_label) in enumerate(train_data): optimizer.zero_grad() distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = -distr.log_prob(sample_label).mean() loss.backward() clip_grad_norm_(arg_proj.parameters(), 10.0) optimizer.step() num_batches += 1 cumulative_loss += loss.item() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) if len(distr_args[0].shape) == 1: return [ param.detach().numpy() for param in arg_proj(torch.ones((1, 1))) ] return [ param[0].detach().numpy() for param in arg_proj(torch.ones((1, 1))) ]
def __init__( self, input_size: int, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, cardinality: List[int], embedding_dimension: List[int], lags_seq: List[int], scaling: bool = True, dtype: np.dtype = np.float32, ) -> None: super().__init__() self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.scaling = scaling self.dtype = dtype self.lags_seq = lags_seq self.distr_output = distr_output rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[self.cell_type] self.rnn = rnn( input_size=input_size, hidden_size=num_cells, num_layers=num_layers, dropout=dropout_rate, batch_first=True, ) self.target_shape = distr_output.event_shape self.proj_distr_args = distr_output.get_args_proj(num_cells) self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=embedding_dimension ) if scaling: self.scaler = MeanScaler(keepdim=True) else: self.scaler = NOPScaler(keepdim=True)
def __init__( self, input_size: int, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, lags_seq: List[int], target_dim: int, conditioning_length: int, cardinality: List[int] = [1], embedding_dimension: int = 1, scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.target_dim = target_dim self.scaling = scaling self.target_dim_sample = target_dim self.conditioning_length = conditioning_length assert len( set(lags_seq)) == len(lags_seq), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.distr_output = distr_output self.target_dim = target_dim rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[self.cell_type] self.rnn = rnn( input_size=input_size, hidden_size=num_cells, num_layers=num_layers, dropout=dropout_rate, batch_first=True, ) self.proj_dist_args = distr_output.get_args_proj(num_cells) self.embed_dim = 1 self.embed = nn.Embedding(num_embeddings=self.target_dim, embedding_dim=self.embed_dim) if scaling: self.scaler = MeanScaler(keepdim=True) else: self.scaler = NOPScaler(keepdim=True)
def __init__( self, input_size: int, decoder_size: int, num_layers: int, num_cells: int, cell_type: str, short_cycle: int, history_length: int, context_length: int, # should be equal to the length of long period prediction_length: int, # should be integer multiples of small period distr_output: DistributionOutput, dropout_rate: float, cardinality: List[int], embedding_dimension: List[int], lags_seq: List[int], scaling: bool = True, dtype: np.dtype = np.float32, ) -> None: super().__init__() self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.short_cycle = short_cycle self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.scaling = scaling self.dtype = dtype # self.lags_seq = lags_seq + [0] # 0 is the current value self.lags_seq = [l - 1 for l in lags_seq] # to distinguish with deepAR self.distr_output = distr_output rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[self.cell_type] self.long_cell = nn.GRUCell(input_size=input_size, hidden_size=num_cells) self.encoder = nn.ModuleList( rnn( input_size=num_cells, hidden_size=num_cells, num_layers=num_layers, dropout=dropout_rate, batch_first=True, ) for _ in range(self.context_length // self.short_cycle) ) # self.encoder = rnn( # input_size=num_cells, # hidden_size=num_cells, # num_layers=num_layers, # dropout=dropout_rate, # batch_first=True, # ) self.decoder = rnn( input_size=decoder_size, hidden_size=num_cells, num_layers=num_layers, dropout=dropout_rate, batch_first=True, ) self.decoder_dnn = nn.ModuleList(nn.Sequential( nn.Linear(decoder_size, 1), # nn.ReLU() ) for _ in range(self.prediction_length)) self.out = nn.Linear(num_cells, 1) self.criterion = nn.MSELoss() # l2, l1 self.target_shape = distr_output.event_shape self.proj_distr_args = distr_output.get_args_proj(num_cells) self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=embedding_dimension ) if scaling: self.scaler = MeanScaler(keepdim=True) else: self.scaler = NOPScaler(keepdim=True)
def __init__( self, input_size: int, d_model: int, num_heads: int, act_type: str, dropout_rate: float, dim_feedforward_scale: int, num_encoder_layers: int, num_decoder_layers: int, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, cardinality: List[int], embedding_dimension: List[int], lags_seq: List[int], scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.scaling = scaling self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.distr_output = distr_output assert len(set(lags_seq)) == len(lags_seq), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.target_shape = distr_output.event_shape # [B, T, input_size] -> [B, T, d_model] self.encoder_input = nn.Linear(input_size, d_model) self.decoder_input = nn.Linear(input_size, d_model) # [B, T, d_model] where d_model / num_heads is int self.transformer = nn.Transformer( d_model=d_model, nhead=num_heads, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward_scale * d_model, dropout=dropout_rate, activation=act_type, ) self.proj_dist_args = distr_output.get_args_proj(d_model) self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=embedding_dimension, ) if scaling: self.scaler = MeanScaler(keepdim=True) else: self.scaler = NOPScaler(keepdim=True) # mask self.register_buffer( "tgt_mask", self.transformer.generate_square_subsequent_mask(prediction_length) )