def fit_mdn(X, y, ncomponents=5, nepochs=50, val_pct=0.1, batch_size=None, target_batch_pct=0.01, min_batch_size=10, max_batch_size=100, verbose=False, lr=3e-4, weight_decay=0.01): import uuid tmp_file = '/tmp/tmp_file_' + str(uuid.uuid4()) if batch_size is None: batch_size = max(min_batch_size, min(max_batch_size, int(np.round(X.shape[0]*target_batch_pct)))) # Standardize the features (helps with gradient propagation) Xstd = X.std(axis=0) Xstd[Xstd == 0] = 1 # Handle constant features tX = autograd.Variable(torch.FloatTensor((X - X.mean(axis=0,keepdims=True)) / Xstd[np.newaxis, :]), requires_grad=False) tY = autograd.Variable(torch.FloatTensor(y), requires_grad=False) # Create train/validate splits indices = np.arange(X.shape[0], dtype=int) np.random.shuffle(indices) train_cutoff = int(np.round(len(indices)*(1-val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] model = MixtureDensityNetwork(X.shape[1], ncomponents, X.mean(axis=0), Xstd, y.mean(), y.std()) # Setup the SGD method optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # Track progress train_losses, val_losses, best_loss = np.zeros(nepochs), np.zeros(nepochs), None # Train the model for epoch in range(nepochs): if verbose: print('\t\tEpoch {}'.format(epoch+1)) sys.stdout.flush() # Track the loss curves train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate(batches(train_indices, batch_size, shuffle=True)): if verbose and (batch_idx % 100 == 0): print('\t\t\tBatch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the predictions pi, mu, sigma = model(tX[tidx]) # Calculate the log-probabilities components = torch.distributions.Normal(mu, sigma) logprobs = components.log_prob(tY[tidx][:,None]) # -log(GMM(y | x)) loss loss = -logsumexp(pi.log() + logprobs, dim=1).mean() # Calculate gradients loss.backward() # Apply the update # [p for p in model.parameters() if p.requires_grad] optimizer.step() # Track the loss train_loss += loss.data validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate(batches(validate_indices, batch_size, shuffle=False)): if verbose and (batch_idx % 100 == 0): print('\t\t\tValidation Batch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to test mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the predictions pi, mu, sigma = model(tX[tidx]) # Calculate the log-probabilities components = torch.distributions.Normal(mu, sigma) logprobs = components.log_prob(tY[tidx][:,None]) # -log(GMM(y | x)) loss loss = -logsumexp(pi.log() + logprobs, dim=1).sum() # Track the loss validate_loss += loss.data train_losses[epoch] = train_loss.numpy() / float(len(train_indices)) val_losses[epoch] = validate_loss.numpy() / float(len(validate_indices)) # Check if we are currently have the best held-out log-likelihood if epoch == 0 or val_losses[epoch] <= best_loss: if verbose: print('\t\t\tSaving test set results. <----- New high water mark on epoch {}'.format(epoch+1)) # If so, use the current model on the test set best_loss = val_losses[epoch] torch.save(model, tmp_file) if verbose: print('Validation loss: {} Best: {}'.format(val_losses[epoch], best_loss)) model = torch.load(tmp_file) os.remove(tmp_file) return model
def fit_classifier( X, y, classes=None, nepochs=40, val_pct=0.1, batch_size=None, target_batch_pct=0.01, min_batch_size=10, max_batch_size=100, verbose=False, lr=3e-4, weight_decay=5e-5, ): if classes is None: classes = np.unique(y) nclasses = classes.shape[0] # Create a temporary file to store the best method import uuid tmp_file = "/tmp/tmp_file_" + str(uuid.uuid4()) # Choose a suitable batch size if batch_size is None: batch_size = max( min_batch_size, min(max_batch_size, int(np.round(X.shape[0] * target_batch_pct))), ) # Standardize the features (helps with gradient propagation) Xstd = X.std(axis=0) Xstd[Xstd == 0] = 1 # Handle constant features tX = autograd.Variable( torch.FloatTensor( (X - X.mean(axis=0, keepdims=True)) / Xstd[np.newaxis, :]), requires_grad=False, ) # Create the classes using their indices tY = np.zeros(y.shape[0], dtype=int) for i, c in enumerate(classes): tY[y == c] = i tY = autograd.Variable(torch.LongTensor(tY), requires_grad=False) # Training weights to balance the dataset y_counts = np.array([(y == c).sum() for c in classes]) tY_weights = autograd.Variable(torch.FloatTensor( len(y_counts) * y_counts / float(len(y))), requires_grad=False) crossent = nn.CrossEntropyLoss(weight=tY_weights) # Create train/validate splits indices = np.arange(X.shape[0], dtype=int) np.random.shuffle(indices) train_cutoff = int(np.round(len(indices) * (1 - val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] model = DiscreteClassifierNetwork(X.shape[1], classes, X.mean(axis=0), Xstd) # Setup the SGD method optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # Track progress train_losses, val_losses, best_loss = np.zeros(nepochs), np.zeros( nepochs), None # Train the model for epoch in range(nepochs): if verbose: print("\t\tEpoch {}".format(epoch + 1)) sys.stdout.flush() # Track the loss curves train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(train_indices, batch_size, shuffle=True)): if verbose and (batch_idx % 100 == 0): print("\t\t\tBatch {}".format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the predictions logits = model(tX[tidx]) # Cross-entropy loss loss = crossent(logits, tY[tidx]) # Calculate gradients loss.backward() # Apply the update # [p for p in model.parameters() if p.requires_grad] optimizer.step() # Track the loss train_loss += loss.data validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(validate_indices, batch_size, shuffle=False)): if verbose and (batch_idx % 100 == 0): print("\t\t\tValidation Batch {}".format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to test mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the predictions logits = model(tX[tidx]) # Cross-entropy loss loss = crossent(logits, tY[tidx]) # Track the loss validate_loss += loss.data train_losses[epoch] = train_loss.numpy() / float(len(train_indices)) val_losses[epoch] = validate_loss.numpy() / float( len(validate_indices)) # Check if we are currently have the best held-out log-likelihood if epoch == 0 or val_losses[epoch] <= best_loss: if verbose: print( "\t\t\tSaving test set results. <----- New high water mark on epoch {}" .format(epoch + 1)) # If so, use the current model on the test set best_loss = val_losses[epoch] torch.save(model, tmp_file) if verbose: print("Validation loss: {} Best: {}".format( val_losses[epoch], best_loss)) model = torch.load(tmp_file) os.remove(tmp_file) return model
def fit_nn(X, y, nepochs=100, batch_size=10, val_pct=0.1, verbose=False, lr=3e-4, weight_decay=0.01, model_type='nonlinear'): import uuid tmp_file = '/tmp/' + str(uuid.uuid4()) # Standardize the features (helps with gradient propagation) Xstd = X.std(axis=0) Xstd[Xstd == 0] = 1 # Handle constant features tX = autograd.Variable(torch.FloatTensor( (X - X.mean(axis=0, keepdims=True)) / Xstd[np.newaxis, :]), requires_grad=False) tY = autograd.Variable(torch.FloatTensor((y - y.mean()) / y.std()), requires_grad=False) # Create train/validate splits indices = np.arange(X.shape[0], dtype=int) np.random.shuffle(indices) train_cutoff = int(np.round(len(indices) * (1 - val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] model = NeuralModel(X.shape[1], X.mean(axis=0), Xstd, y.mean(), y.std(), model_type=model_type) # Setup the SGD method optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # Track progress train_losses, val_losses, best_loss = np.zeros(nepochs), np.zeros( nepochs), None # Train the model for epoch in range(nepochs): if verbose: print('\t\tEpoch {}'.format(epoch + 1)) sys.stdout.flush() # Track the loss curves train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(train_indices, batch_size, shuffle=True)): if verbose and (batch_idx % 100 == 0): print('\t\t\tBatch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the predictions tYhat = model(tX[tidx])[:, 0] # MSE loss loss = ((tY[tidx] - tYhat)**2).sum() # Calculate gradients loss.backward() # Apply the update # [p for p in model.parameters() if p.requires_grad] optimizer.step() # Track the loss train_loss += loss.data validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(validate_indices, batch_size, shuffle=False)): if verbose and (batch_idx % 100 == 0): print('\t\t\tValidation Batch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to test mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the prior predictions tYhat = model(tX[tidx])[:, 0] # MSE loss loss = ((tY[tidx] - tYhat)**2).sum() # Track the loss validate_loss += loss.data train_losses[epoch] = train_loss.numpy() / float(len(train_indices)) val_losses[epoch] = validate_loss.numpy() / float( len(validate_indices)) # Check if we are currently have the best held-out log-likelihood if epoch == 0 or val_losses[epoch] <= best_loss: if verbose: print( '\t\t\tSaving test set results. <----- New high water mark on epoch {}' .format(epoch + 1)) # If so, use the current model on the test set best_loss = val_losses[epoch] torch.save(model, tmp_file) if verbose: print('Validation loss: {} Best: {}'.format( val_losses[epoch], best_loss)) model = torch.load(tmp_file) os.remove(tmp_file) return model