def predict(arg): img_meta, img = arg h, w = img.shape[:2] s = patch_size # step = s // 2 - 32 step = s - 64 xs = list(range(0, w - s, step)) + [w - s] ys = list(range(0, h - s, step)) + [h - s] all_xy = [(x, y) for x in xs for y in ys] pred_img = np.zeros((utils.N_CLASSES + 1, h, w), dtype=np.float32) pred_count = np.zeros((h, w), dtype=np.int32) def make_batch(xy_batch_): return (xy_batch_, torch.stack([ utils.img_transform(img[y: y + s, x: x + s]) for x, y in xy_batch_])) for xy_batch, inputs in utils.imap_fixed_output_buffer( make_batch, tqdm.tqdm(list(utils.batches(all_xy, batch_size))), threads=1): outputs = model(utils.variable(inputs, volatile=True)) outputs_data = np.exp(outputs.data.cpu().numpy()) for (x, y), pred in zip(xy_batch, outputs_data): pred_img[:, y: y + s, x: x + s] += pred pred_count[y: y + s, x: x + s] += 1 pred_img /= np.maximum(pred_count, 1) return img_meta, pred_img
def run(self, no_threads=1): logging.info('parsing xml...') self.parse_dict() entries_per_thread = (len(self.raw_dict) / no_threads) + 1 self.thread_states = {} # may turn out to be less then "no_threads" with small input started_threads = 0 for i, batch in enumerate( batches(self.raw_dict.keys(), entries_per_thread)): t = threading.Thread(target=self.process_entries_thread, args=(i, batch)) t.start() started_threads += 1 logging.info("started {0} threads".format(started_threads)) while True: if len(self.thread_states) < started_threads: time.sleep(1) continue elif all(self.thread_states.values()): logging.info( "{0} threads finished successfully".format(no_threads)) break else: raise Exception("some threads failed")
def train_step(sess, model, train): if len(train) % parameters.batch_size == 0: batch_num = int(len(train) / parameters.batch_size) else: batch_num = int(len(train) / parameters.batch_size) + 1 step = 0 loss = .0 batches = utils.batches(train, parameters.batch_size) for batch in batches: input_x, hamds, intervals, drugs, targets = batch _loss, _ = sess.run( [model.loss, model.optimizer], feed_dict={ model.input_x: input_x, model.input_hamd: hamds, model.input_interval: intervals, model.input_drug: drugs, model.input_target: targets, model.dropout_rate: parameters.dropout_rate }) loss += _loss step += 1 sys.stdout.write("\033[F") sys.stdout.write("\033[K") print("Process Training Batch: [{}/{}]".format(step, batch_num)) return loss
def task_1(sess, model, test): step = 0 if len(test) % parameters.batch_size == 0: batch_num = int(len(test) / parameters.batch_size) else: batch_num = int(len(test) / parameters.batch_size) + 1 targets = [] inferences = [] batches = utils.batches(test, parameters.batch_size) for batch in batches: input_x, hamds, intervals, drugs, _targets = batch inference = sess.run(model.inference, feed_dict={ model.input_x: input_x, model.input_hamd: hamds, model.input_interval: intervals, model.input_drug: drugs, model.dropout_rate: 1.0 }) targets.append(_targets) inferences.append(inference) targets = np.concatenate(targets, 0) inferences = np.concatenate(inferences, 0) rms = math.sqrt(sum((targets - inferences)**2) / len(targets)) return rms
def validation_loss(): with torch.no_grad(): val_losses = [] val_h = net.blank_hidden(batch_size) for x, y in batches(X_val, Y_val, batch_size, seq_size): out_val, val_h = net(x, val_h) val_loss = F.cross_entropy(out_val.transpose(1,2), y) val_losses.append(val_loss) val_losses = torch.stack(val_losses) return val_losses
def get_track_features(tracks, throttle=False): all_tracks = [] for track_batch in utils.batches(tracks, 50): track_ids = [track["id"] for track in track_batch] print(track_ids[0]) # just print something to know stuff is happening track_features = sp.audio_features(track_ids) all_tracks += track_features if throttle: sleep(1.0) return all_tracks
def train(epochs=20): ''' Train the RNN and save the model Inputs ------ epochs: the number of rounds we train on the whole dataset ''' iters = 0 for epoch in range(epochs): batch_num = 0 losses = [] h = net.blank_hidden(batch_size) for x, y in batches(X, Y, batch_size, seq_size): # use network predictions to compute loss h = tuple([state.detach() for state in h]) out, h = net(x, h) loss = F.cross_entropy(out.transpose(1, 2), y) # optimization step opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), grad_norm) opt.step() # print training progress progress(batch_num, num_batches, iters, epochs * num_batches, epoch + 1) # bookkeeping losses.append(loss) batch_num += 1 iters += 1 # plot loss after every epoch plot(epoch + 1, torch.stack(losses), 'Loss', 'Training', '#5DE58D', refresh=False) plot(epoch + 1, validation_loss(), 'Loss', 'Validation', '#4AD2FF') # save the model occasionally if epoch % save_iter == save_iter - 1: save_model(net, filename, epoch + 1) # save model at the end of training save_model(net, filename, 'final')
def batch_lookup(ids, country="us"): """ Look up many iTunes tracks by IDs. """ session = requests.Session() for ids_batch in batches(ids, size=150): results = {} for result in lookup(ids_batch, country=country, session=session): type = result["wrapperType"] id = result.get(type + "Id") or result["trackId"] results[id] = result for id in ids_batch: yield (id, results.get(id))
def collect_artists(genre, num_artists=SEARCH_LIMIT): sp = utils.get_spotipy_instance() artists = [] for batch in utils.batches(range(num_artists), SEARCH_LIMIT): search_results = sp.search( q=f"genre:{genre}", type="artist", offset=batch[0], limit=len(batch) ) artists += search_results["artists"]["items"] return sorted( artists, reverse=True, key=lambda artist: artist["popularity"] )
def train(epochs=20): reals, total, js, iters = 0, 0, 0, 0 for epoch in range(epochs): batch_num = 0 losses = [] h = net.blank_hidden(batch_size) for x, y in batches(X, Y, batch_size, seq_size): # use network predictions to compute loss h = tuple([state.detach() for state in h]) out, h = net(x, h) loss = F.cross_entropy(out.transpose(1,2), y) # optimization step opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), grad_norm) opt.step() # print training progress progress(batch_num, num_batches, iters, epochs * num_batches, epoch) # bookkeeping losses.append(loss) batch_num += 1 iters += 1 # plot loss after every epoch plot(epoch, torch.stack(losses), 'Loss', 'Training', '#5DE58D', refresh=False) plot(epoch, validation_loss(), 'Loss', 'Validation', '#4AD2FF') plot(epoch, math.log(total+1), 'Words', '\'words\'', '#52d737') plot(epoch, math.log(reals+1), 'Words', 'real words', '#d437d7') plot(epoch, js, 'Zionism', 'instances', '#c12b2b') print(colored('\n\n' + smpl + '\n', 'cyan')) # print sample
def run(self, no_threads=1): logging.info('parsing xml...') self.parse_dict() # print "\n".join(["\n".join(["{0}\t{1}".format( # w, d['definition']) for d in s['senses']]) # for w, s in self.raw_dict.items()]) # print self.raw_dict # sys.exit(-1) entries_per_thread = (len(self.raw_dict) / no_threads) + 1 self.thread_states = {} # may turn out to be less then "no_threads" with small input started_threads = 0 if ONE_BY_ONE: logging.warning('running threads one by one!') for i, batch in enumerate( batches(self.raw_dict.keys(), entries_per_thread)): if ONE_BY_ONE: logging.warning('running batch #{0}'.format(i)) self.process_entries_thread(i, batch) else: t = threading.Thread(target=self.process_entries_thread, args=(i, batch)) t.start() started_threads += 1 logging.info("started {0} threads".format(started_threads)) while True: if len(self.thread_states) < started_threads: time.sleep(1) continue elif all(self.thread_states.values()): logging.info( "{0} threads finished successfully".format(no_threads)) break else: raise Exception("some threads failed")
def _predict(arg, model, patch_size, batch_size, blobs_by_img_id, blob_scale_by_img_id): (path, scale), img = arg img_id = int(path.stem) h, w = img.shape[:2] s = patch_size // 2 cls_blobs = blobs_by_img_id.get(img_id) if not cls_blobs or not any(cls_blobs): return (path, scale), None blob_scale = blob_scale_by_img_id[img_id] all_xy = [(cls, i, int(round(x * blob_scale * scale)), int(round(y * blob_scale * scale))) for cls, blobs in enumerate(cls_blobs) for i, (x, y, _) in enumerate(blobs)] def make_batch(xy_batch_): indices, patches = [], [] for cls, i, x, y in xy_batch_: patch = img[max(0, y - s):y + s, max(0, x - s):x + s] if patch.shape[:2] == (patch_size, patch_size): patches.append(utils.img_transform(patch)) indices.append((cls, i)) patches = torch.stack(patches) if patches else None return indices, patches all_indices, all_outputs = [], [] for indices, inputs in utils.imap_fixed_output_buffer( make_batch, tqdm.tqdm(list(utils.batches(all_xy, batch_size))), threads=1): if inputs is not None: outputs = model(utils.variable(inputs, volatile=True)) outputs = F.softmax(outputs).data.cpu().numpy() all_indices.extend(indices) all_outputs.extend(outputs) return img_id, (all_indices, all_outputs)
def run(self, no_threads=1): logging.info('parsing xml...') self.parse_dict() # print "\n".join(["\n".join(["{0}\t{1}".format( # w, d['definition']) for d in s['senses']]) # for w, s in self.raw_dict.items()]) # print self.raw_dict # sys.exit(-1) entries_per_thread = (len(self.raw_dict) / no_threads) + 1 self.thread_states = {} # may turn out to be less then "no_threads" with small input started_threads = 0 if ONE_BY_ONE: logging.warning('running threads one by one!') for i, batch in enumerate(batches(self.raw_dict.keys(), entries_per_thread)): if ONE_BY_ONE: logging.warning('running batch #{0}'.format(i)) self.process_entries_thread(i, batch) else: t = threading.Thread( target=self.process_entries_thread, args=(i, batch)) t.start() started_threads += 1 logging.info("started {0} threads".format(started_threads)) while True: if len(self.thread_states) < started_threads: time.sleep(1) continue elif all(self.thread_states.values()): logging.info( "{0} threads finished successfully".format(no_threads)) break else: raise Exception("some threads failed")
num_outputs=1200, activation_fn=tf.nn.tanh) layer_2 = tf.contrib.layers.fully_connected(inputs=layer_1, num_outputs=600, activation_fn=tf.nn.tanh) _out = tf.contrib.layers.fully_connected(inputs=layer_2, num_outputs=_y.shape[1], activation_fn=tf.nn.tanh) cost = tf.reduce_mean(tf.pow(_out - y, 2)) optimizer = tf.train.AdamOptimizer().minimize(cost) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) for i in range(100): for batch_x, batch_y in utils.batches(_x, _y, 1000): _, c = sess.run([optimizer, cost], feed_dict={ x: batch_x, y: batch_y }) print(c) c = sess.run(cost, feed_dict={x: _x_t, y: _y_t}) print('test: ', c)
def fit_quantiles(X, y, quantiles=0.5, lossfn='marginal', nepochs=100, val_pct=0.1, batch_size=None, target_batch_pct=0.01, min_batch_size=20, max_batch_size=100, verbose=False, lr=1e-1, weight_decay=0.0, patience=5, init_model=None, splits=None, file_checkpoints=True, clip_gradients=False, **kwargs): if file_checkpoints: import uuid tmp_file = '/tmp/tmp_file_' + str(uuid.uuid4()) if batch_size is None: batch_size = min( X.shape[0], max( min_batch_size, min(max_batch_size, int(np.round(X.shape[0] * target_batch_pct))))) if verbose: print('Auto batch size chosen to be {}'.format(batch_size)) # Standardize the features and response (helps with gradient propagation) Xmean = X.mean(axis=0, keepdims=True) Xstd = X.std(axis=0, keepdims=True) Xstd[Xstd == 0] = 1 # Handle constant features ymean, ystd = y.mean(axis=0, keepdims=True), y.std(axis=0, keepdims=True) tX = autograd.Variable(torch.FloatTensor((X - Xmean) / Xstd), requires_grad=False) tY = autograd.Variable(torch.FloatTensor((y - ymean) / ystd), requires_grad=False) # Create train/validate splits if splits is None: indices = np.arange(X.shape[0], dtype=int) np.random.shuffle(indices) train_cutoff = int(np.round(len(indices) * (1 - val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] else: train_indices, validate_indices = splits if np.isscalar(quantiles): quantiles = np.array([quantiles]) if lossfn == 'geometric': quantiles = 2 * quantiles - 1 tquantiles = autograd.Variable(torch.FloatTensor(quantiles), requires_grad=False) # Initialize the model model = QuantileNetworkModule( Xmean, Xstd, ymean, ystd, quantiles.shape[0]) if init_model is None else init_model # Save the model to file if file_checkpoints: torch.save(model, tmp_file) else: import pickle model_str = pickle.dumps(model) # Setup the SGD method optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, nesterov=True, momentum=0.9) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5) # Track progress train_losses, val_losses, best_loss = np.zeros(nepochs), np.zeros( nepochs), None num_bad_epochs = 0 if verbose: print('ymax and min:', tY.max(), tY.min()) # Univariate quantile loss def quantile_loss(yhat, tidx): z = tY[tidx, None] - yhat return torch.max(tquantiles[None] * z, (tquantiles[None] - 1) * z) # Marginal quantile loss for multivariate response def marginal_loss(yhat, tidx): z = tY[tidx, :, None] - yhat return torch.max(tquantiles[None, None] * z, (tquantiles[None, None] - 1) * z) # Geometric quantile loss -- uses a Euclidean unit ball definition of multivariate quantiles def geometric_loss(yhat, tidx): z = tY[tidx, :, None] - yhat return torch.norm(z, dim=1) + (z * tquantiles[None, None]).sum(dim=1) # Create the quantile loss function if len(tY.shape) == 1 or tY.shape[1] == 1: lossfn = quantile_loss elif lossfn == 'marginal': print('Using marginal loss') lossfn = marginal_loss elif lossfn == 'geometric': print('Using geometric loss') lossfn = geometric_loss # Train the model for epoch in range(nepochs): if verbose: print('\t\tEpoch {}'.format(epoch + 1)) sys.stdout.flush() # Track the loss curves train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(train_indices, batch_size, shuffle=True)): if verbose and (batch_idx % 100 == 0): print('\t\t\tBatch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the predicted quantiles yhat = model(tX[tidx]) # Loss for all quantiles loss = lossfn(yhat, tidx).mean() # Calculate gradients loss.backward() # Clip the gradients if clip_gradients: clip_gradient(model) # Apply the update # [p for p in model.parameters() if p.requires_grad] optimizer.step() # Track the loss train_loss += loss.data if np.isnan(loss.data.numpy()): import warnings warnings.warn('NaNs encountered in training model.') break validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(validate_indices, batch_size, shuffle=False)): if verbose and (batch_idx % 100 == 0): print('\t\t\tValidation Batch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to test mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the conditional mixture weights yhat = model(tX[tidx]) # Track the loss validate_loss += lossfn(yhat, tidx).sum() train_losses[epoch] = train_loss.data.numpy() / float( len(train_indices)) val_losses[epoch] = validate_loss.data.numpy() / float( len(validate_indices)) # Adjust the learning rate down if the validation performance is bad if num_bad_epochs > patience: if verbose: print('Decreasing learning rate to {}'.format(lr * 0.5)) scheduler.step(val_losses[epoch]) lr *= 0.5 num_bad_epochs = 0 # If the model blew up and gave us NaNs, adjust the learning rate down and restart if np.isnan(val_losses[epoch]): if verbose: print( 'Network went to NaN. Readjusting learning rate down by 50%' ) if file_checkpoints: os.remove(tmp_file) return fit_quantiles(X, y, quantiles=quantiles, lossfn=lossfn, nepochs=nepochs, val_pct=val_pct, batch_size=batch_size, target_batch_pct=target_batch_pct, min_batch_size=min_batch_size, max_batch_size=max_batch_size, verbose=verbose, lr=lr * 0.5, weight_decay=weight_decay, patience=patience, init_model=init_model, splits=splits, file_checkpoints=file_checkpoints, **kwargs) # Check if we are currently have the best held-out log-likelihood if epoch == 0 or val_losses[epoch] <= best_loss: if verbose: print( '\t\t\tSaving test set results. <----- New high water mark on epoch {}' .format(epoch + 1)) # If so, use the current model on the test set best_loss = val_losses[epoch] if file_checkpoints: torch.save(model, tmp_file) else: import pickle model_str = pickle.dumps(model) else: num_bad_epochs += 1 if verbose: print('Validation loss: {} Best: {}'.format( val_losses[epoch], best_loss)) # Load the best model and clean up the checkpoints if file_checkpoints: model = torch.load(tmp_file) os.remove(tmp_file) else: import pickle model = pickle.loads(model_str) # Return the conditional density model that marginalizes out the grid return model
def train(self, model_fn=None, lasso=0., l2=1e-4, lr=3e-4, num_epochs=250, batch_size=None, num_folds=3, val_pct=0.1, verbose=False, folds=None, weight_decay=0.01, random_restarts=1, save_dir='/tmp/', momentum=0.9, patience=3, clip_gradients=None): # Make sure we have a model of the prior if model_fn is None: model_fn = lambda nfeatures: DeepAdaptiveFDRModeler(nfeatures) # Lasso penalty (if any) lasso = autograd.Variable(torch.FloatTensor([lasso]), requires_grad=False) l2 = autograd.Variable(torch.FloatTensor([l2]), requires_grad=False) if batch_size is None: batch_size = int( max(10, min(100, np.round(self.X.shape[0] / 100.)))) print('Batch size: {}'.format(batch_size)) # Discrete approximation of a beta PDF support tbeta_grid = autograd.Variable(torch.FloatTensor(self.beta_grid), requires_grad=False) sys.stdout.flush() # Split the data into a bunch of cross-validation folds if folds is None: if verbose: print('\tCreating {} folds'.format(num_folds)) sys.stdout.flush() folds = create_folds(self.X, k=num_folds) self.priors = np.zeros((self.nsamples, 2), dtype=float) self.models = [] train_losses, val_losses = np.zeros( (len(folds), random_restarts, num_epochs)), np.zeros( (len(folds), random_restarts, num_epochs)) epochs_per_fold = np.zeros(len(folds)) for fold_idx, test_indices in enumerate(folds): # Create train/validate splits mask = np.ones(self.nsamples, dtype=bool) mask[test_indices] = False indices = np.arange(self.nsamples, dtype=int)[mask] np.random.shuffle(indices) train_cutoff = int(np.round(len(indices) * (1 - val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] torch_test_indices = autograd.Variable( torch.LongTensor(test_indices), requires_grad=False) best_loss = None # Try re-initializing a few times for restart in range(random_restarts): model = model_fn(self.nfeatures) # Setup the optimizers # optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=patience) optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) # optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # Train the model for epoch in range(num_epochs): if verbose: print('\t\tRestart {} Fold {} Epoch {}'.format( restart + 1, fold_idx + 1, epoch + 1)) sys.stdout.flush() train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(train_indices, batch_size, shuffle=False)): if verbose and (batch_idx % 100 == 0): print('\t\t\tBatch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the prior predictions concentrations = model(self.tX[tidx]) # Calculate the loss as the negative log-likelihood of the data # Use a beta prior for the treatment effect prior_dist = torch.distributions.Beta( concentrations[:, 0:1], concentrations[:, 1:2]) # Discretize the (0,1) interval to approximate the beta PDF prior_probs = prior_dist.log_prob(tbeta_grid).exp() prior_probs = prior_probs / prior_probs.sum( dim=1, keepdim=True) # Calculate the loss posterior_probs = (((1 - tbeta_grid) * self.tP0[tidx] + tbeta_grid * self.tP1[tidx]) * prior_probs).sum(dim=1) loss = -posterior_probs.log().mean() # L1 penalty to shrink c and be more conservative regularized_loss = loss + lasso * concentrations.mean( ) + l2 * (concentrations**2).mean() # Update the model with gradient clipping for stability regularized_loss.backward() # Clip the gradients if need-be if clip_gradients is not None: torch.nn.utils.clip_grad_norm( model.parameters(), clip_gradients) # Apply the update [p for p in model.parameters() if p.requires_grad] optimizer.step() # Track the loss train_loss += loss.data validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate( batches(validate_indices, batch_size)): if verbose and (batch_idx % 100 == 0): print( '\t\t\tValidation Batch {}'.format(batch_idx)) tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) # Set the model to test mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the prior predictions concentrations = model(self.tX[tidx]) # Calculate the loss as the negative log-likelihood of the data # Use a beta prior for the treatment effect prior_dist = torch.distributions.Beta( concentrations[:, 0:1], concentrations[:, 1:2]) # Discretize the (0,1) interval to approximate the beta PDF prior_probs = prior_dist.log_prob(tbeta_grid).exp() prior_probs = (prior_probs / prior_probs.sum( dim=1, keepdim=True)).clamp(1e-8, 1 - 1e-8) # Calculate the loss posterior_probs = (((1 - tbeta_grid) * self.tP0[tidx] + tbeta_grid * self.tP1[tidx]) * prior_probs).sum(dim=1).clamp( 1e-8, 1 - 1e-8) loss = -posterior_probs.log().sum() # Track the loss validate_loss += loss.data train_losses[fold_idx, restart, epoch] = train_loss.numpy() / float( len(train_indices)) val_losses[fold_idx, restart, epoch] = validate_loss.numpy() / float( len(validate_indices)) # # Adjust the learning rate down if the validation performance is bad # scheduler.step(val_losses[fold_idx, epoch]) # Check if we are currently have the best held-out log-likelihood if verbose: print('Validation loss: {} Best: {}'.format( val_losses[fold_idx, restart, epoch], best_loss)) if (restart == 0 and epoch == 0 ) or val_losses[fold_idx, restart, epoch] <= best_loss: if verbose: print( '\t\t\tSaving test set results. <----- New high water mark for fold {} on epoch {}' .format(fold_idx + 1, epoch + 1)) # If so, use the current model on the test set best_loss = val_losses[fold_idx, restart, epoch] epochs_per_fold[fold_idx] = epoch + 1 self.priors[test_indices] = model( self.tX[torch_test_indices]).data.numpy() torch.save(model, save_dir + '_fold{}.pt'.format(fold_idx)) if verbose: means = self.priors[test_indices, 0] / self.priors[test_indices].sum( axis=1) print('Prior range: [{},{}]'.format( means.min(), means.max())) print('First 3:') print(self.priors[test_indices][:3]) # Reload the best model self.models.append( torch.load(save_dir + '_fold{}.pt'.format(fold_idx))) # Calculate the posterior probabilities if verbose: print('Calculating posteriors.') sys.stdout.flush() prior_grid = beta.pdf(self.beta_grid, self.priors[:, 0:1], self.priors[:, 1:2]) prior_grid /= prior_grid.sum(axis=1, keepdims=True) post0 = self.P0 * (1 - self.beta_grid) post1 = self.P1 * self.beta_grid self.posteriors = ((post1 / (post0 + post1)) * prior_grid).sum(axis=1) self.posteriors = self.posteriors.clip(1e-8, 1 - 1e-8) if verbose: print('Calculating predictions at a {:.2f}% FDR threshold'.format( self.fdr * 100)) sys.stdout.flush() self.predictions = calc_fdr(self.posteriors, self.fdr) if verbose: print('Finished training.') sys.stdout.flush() self.folds = folds return { 'train_losses': train_losses, 'validation_losses': val_losses, 'priors': self.priors, 'posteriors': self.posteriors, 'predictions': self.predictions, 'models': self.models, 'folds': folds }
saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) writer = tf.summary.FileWriter(logdir + '/train', sess.graph) if restore: chkpt = tf.train.latest_checkpoint(checkpointdir) if chkpt: print('restoring checkpoint: {}'.format(chkpt)) saver.restore(sess, chkpt) prints('Training Begins!') for epochNum in range(epochs): prints('Starting epoch {}'.format(epochNum + 1)) batchNum = 0 for batch, labels, denseLabels in utils.batches(): batchNum += 1 iteration = (batchSize * (epochNum)) + batchNum feed = { x: batch, y: labels, yDense: denseLabels, lr: learningRate, pkeep: pkeepi, pkeepConv: pkeepConvi, pkeepLSTM: pkeepLSTMi, tst: False } feedema = { x: batch,
def train(self, model_fn, bandwidth=2., kernel_scale=0.35, variance=0.02, mvn_train_samples=5, mvn_validate_samples=105, validation_samples=1000, validation_burn=1000, validation_mcmc_samples=1000, validation_thin=1, lr=3e-4, num_epochs=10, batch_size=100, val_pct=0.1, nfolds=5, folds=None, learning_rate_decay=0.9, weight_decay=0., clip=None, group_lasso_penalty=0., save_dir='tmp/', checkpoint=False, target_fold=None): print('\tFitting model using {} folds and training for {} epochs each'.format(nfolds, num_epochs)) torch_Y = autograd.Variable(torch.FloatTensor(self.Y), requires_grad=False) torch_lam_grid = autograd.Variable(torch.FloatTensor(self.lam_grid), requires_grad=False) torch_lam_weights = autograd.Variable(torch.FloatTensor(self.lam_weights), requires_grad=False) torch_c = autograd.Variable(torch.FloatTensor(self.c[:,np.newaxis,np.newaxis]), requires_grad=False) torch_obs = autograd.Variable(torch.FloatTensor(self.obs_mask), requires_grad=False) torch_dose_idxs = [autograd.Variable(torch.LongTensor( np.arange(d+(d**2 - d)//2, (d+1)+((d+1)**2 - (d+1))//2)), requires_grad=False) for d in range(self.ndoses)] # Use a fixed kernel Sigma = np.array([kernel_scale*(np.exp(-0.5*(i - np.arange(self.ndoses))**2 / bandwidth**2)) for i in np.arange(self.ndoses)]) + variance*np.eye(self.ndoses) # squared exponential kernel L = np.linalg.cholesky(Sigma)[np.newaxis,np.newaxis,:,:] # Use a fixed set of noise draws for validation Z = np.random.normal(size=(self.Y_shape[0], mvn_validate_samples, self.ndoses, 1)) validate_noise = autograd.Variable(torch.FloatTensor(np.matmul(L, Z)[:,:,:,0]), requires_grad=False) self.folds = folds if folds is not None else create_folds(self.Y_shape[0], nfolds) nfolds = len(self.folds) self.fold_validation_indices = [] self.prior_mu = np.full(self.Y_shape, np.nan, dtype=float) self.prior_Sigma = np.zeros((nfolds, self.ndoses, self.ndoses)) self.train_losses, self.val_losses = np.zeros((nfolds,num_epochs)), np.zeros((nfolds,num_epochs)) self.epochs_per_fold = np.zeros(nfolds, dtype=int) self.models = [None for _ in range(nfolds)] for fold_idx, test_indices in enumerate(self.folds): # Create train/validate splits mask = np.ones(self.Y_shape[0], dtype=bool) mask[test_indices] = False indices = np.arange(self.Y_shape[0], dtype=int)[mask] np.random.shuffle(indices) train_cutoff = int(np.round(len(indices)*(1-val_pct))) train_indices = indices[:train_cutoff] validate_indices = indices[train_cutoff:] torch_test_indices = autograd.Variable(torch.LongTensor(test_indices), requires_grad=False) self.fold_validation_indices.append(validate_indices) # If we are only training one specific fold, skip all the rest if target_fold is not None and target_fold != fold_idx: continue if checkpoint: self.load_checkpoint(save_dir, fold_idx) if self.models[fold_idx] is None: self.models[fold_idx] = model_fn() model = self.models[fold_idx] # Setup the optimizers # optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9) optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3) for epoch in range(self.epochs_per_fold[fold_idx], num_epochs): print('\t\tFold {} Epoch {}'.format(fold_idx+1,epoch+1)) train_loss = torch.Tensor([0]) for batch_idx, batch in enumerate(batches(train_indices, batch_size)): if batch_idx % 100 == 0: print('\t\t\tBatch {}'.format(batch_idx)) sys.stdout.flush() tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) Z = np.random.normal(size=(len(batch), mvn_train_samples, self.ndoses, 1)) noise = autograd.Variable(torch.FloatTensor(np.matmul(L, Z)[:,:,:,0]), requires_grad=False) # Set the model to training mode model.train() # Reset the gradient model.zero_grad() # Run the model and get the prior predictions mu = model(batch, tidx) #### Calculate the loss as the negative log-likelihood of the data #### # Get the MVN draw as mu + L.T.dot(Z) beta = mu.view(-1,1,self.ndoses) + noise # Logistic transform on the log-odds prior sample tau = 1 / (1. + (-beta).exp()) # Poisson noise model for observations rates = tau[:,:,:,None] * torch_lam_grid[tidx,None,:,:] + torch_c[tidx,None,:,:] likelihoods = torch.distributions.Poisson(rates) # Get log probabilities of the data and filter out the missing observations loss = -(logsumexp(likelihoods.log_prob(torch_Y[tidx][:,None,:,None]) + torch_lam_weights[tidx][:,None,:,:], dim=-1).mean(dim=1) * torch_obs[tidx]).mean() if group_lasso_penalty > 0: loss += group_lasso_penalty * torch.norm(model.cell_line_features.weight, 2, 0).mean() # Update the model loss.backward() if clip is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) else: optimizer.step() train_loss += loss.data validate_loss = torch.Tensor([0]) for batch_idx, batch in enumerate(batches(validate_indices, batch_size, shuffle=False)): if batch_idx % 100 == 0: print('\t\t\tValidation Batch {}'.format(batch_idx)) sys.stdout.flush() tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False) noise = validate_noise[tidx] # Set the model to training mode model.eval() # Reset the gradient model.zero_grad() # Run the model and get the prior predictions mu = model(batch, tidx) #### Calculate the loss as the negative log-likelihood of the data #### # Get the MVN draw as mu + L.T.dot(Z) beta = mu.view(-1,1,self.ndoses) + noise # Logistic transform on the log-odds prior sample tau = 1 / (1. + (-beta).exp()) # Poisson noise model for observations rates = tau[:,:,:,None] * torch_lam_grid[tidx,None,:,:] + torch_c[tidx,None,:,:] likelihoods = torch.distributions.Poisson(rates) # Get log probabilities of the data and filter out the missing observations loss = -(logsumexp(likelihoods.log_prob(torch_Y[tidx][:,None,:,None]) + torch_lam_weights[tidx][:,None,:,:], dim=-1).mean(dim=1) * torch_obs[tidx]).sum() validate_loss += loss.data self.train_losses[fold_idx, epoch] = train_loss.numpy() / float(len(train_indices)) self.val_losses[fold_idx, epoch] = validate_loss.numpy() / float(len(validate_indices)) # Adjust the learning rate down if the validation performance is bad scheduler.step(self.val_losses[fold_idx, epoch]) # Check if we currently have the best held-out log-likelihood if epoch == 0 or np.argmin(self.val_losses[fold_idx, :epoch+1]) == epoch: print('\t\t\tNew best score: {}'.format(self.val_losses[fold_idx,epoch])) print('\t\t\tSaving test set results.') # If so, use the current model on the test set mu = model(test_indices, torch_test_indices) self.prior_mu[test_indices] = mu.data.numpy() self.save_fold(save_dir, fold_idx) cur_mu = self.prior_mu[test_indices] print('First 10 data points: {}'.format(test_indices[:10])) print('First 10 prior means:') print(pretty_str(ilogit(cur_mu[:10]))) print('Prior mean ranges:') for dose in range(self.ndoses): print('{}: {} [{}, {}]'.format(dose, ilogit(cur_mu[:,dose].mean()), np.percentile(ilogit(cur_mu[:,dose]), 5), np.percentile(ilogit(cur_mu[:,dose]), 95))) print('Best model score: {} (epoch {})'.format(np.min(self.val_losses[fold_idx,:epoch+1]), np.argmin(self.val_losses[fold_idx, :epoch+1])+1)) print('Current score: {}'.format(self.val_losses[fold_idx, epoch])) print('') self.epochs_per_fold[fold_idx] += 1 # Update the save point if needed if checkpoint: self.save_checkpoint(save_dir, fold_idx, model) sys.stdout.flush() # Reload the best model tmp = model.cell_features self.load_fold(save_dir, fold_idx) self.models[fold_idx].cell_features = tmp print('Finished fold {}. Estimating covariance matrix using elliptical slice sampler with max {} samples.'.format(fold_idx+1, validation_samples)) validate_subset = np.random.choice(validate_indices, validation_samples, replace=False) if len(validate_indices) > validation_samples else validate_indices tidx = autograd.Variable(torch.LongTensor(validate_subset), requires_grad=False) # Set the model to training mode self.models[fold_idx].eval() # Reset the gradient self.models[fold_idx].zero_grad() # Run the model and get the prior predictions mu_validate = self.models[fold_idx](validate_subset, tidx).data.numpy() # Run the slice sampler to get the covariance and data log-likelihoods Y_validate = self.Y[validate_subset].astype(int) Y_validate[self.obs_mask[validate_subset] == 0] = -1 (Beta_samples, Sigma_samples, Loglikelihood_samples) = posterior_ess_Sigma(Y_validate, mu_validate, self.a[validate_subset], self.b[validate_subset], self.c[validate_subset], Sigma=Sigma, nburn=validation_burn, nsamples=validation_mcmc_samples, nthin=validation_thin, print_freq=1) # Save the result self.prior_Sigma[fold_idx] = Sigma_samples.mean(axis=0) print('Last sample:') print(pretty_str(Sigma_samples[-1])) print('Mean:') print(pretty_str(self.prior_Sigma[fold_idx])) if checkpoint: self.clean_checkpoint(save_dir, fold_idx) print('Finished training.') return {'train_losses': self.train_losses, 'validation_losses': self.val_losses, 'mu': self.prior_mu, 'Sigma': self.prior_Sigma, 'models': self.models}
model.sess.run(tf.assign(model.lr, learning_rate * lr_decay**i)) print "learning_rate:{}".format(model.lr.eval()) c0, c1, c2 = model.istate_cell0.c.eval(), model.istate_cell1.c.eval( ), model.istate_cell2.c.eval() h0, h1, h2 = model.istate_cell0.h.eval(), model.istate_cell1.h.eval( ), model.istate_cell2.h.eval() kappa = np.zeros((model.batch_size, model.mixture_comps, 1)) for b in range(global_step % no_batches, no_batches): a = i * no_batches + b if global_step is not 0: a += 1 global_step = 0 if a % save_batches == 0 and (a > 0): model.saver.save(model.sess, save_path, global_step=a) x, y, s, c = batches(model.batch_size, training_data, Y, sentences, model.char_steps, model.alphabets) my_feed_dict = { model.input_data: x, model.target_Data: y, model.char_sequence: c, model.init_kappa: kappa, model.istate_cell0.c: c0, model.istate_cell1.c: c1, model.istate_cell2.c: c2, model.istate_cell0.h: h0, model.istate_cell1.h: h1, model.istate_cell2.h: h2 } [training_loss, _] = model.sess.run([model.cost, model.train_ops], my_feed_dict)
drcf = model.DRCF(EMBEDDING_DIM, RNN_STEP, len(user2id), len(venue2id), SAMPLE_NUM) drcf = nn.DataParallel(drcf).cuda() optimizer = optim.Adam(filter(lambda p: p.requires_grad, drcf.parameters()), lr=LEARNING_RATE) criterion = nn.LogSigmoid() for i in xrange(EPOCHS): # Training drcf.train() step = 0 loss = .0 batch_num = int(len(train) / BATCH_SIZE) + 1 batches = utils.batches(train, BATCH_SIZE, SAMPLE_NUM, venue_frequency) for batch in batches: user, candidate, checkins, samples = batch input_user = Variable(torch.cuda.LongTensor(user)) input_candidate = Variable(torch.cuda.LongTensor(candidate)) input_checkins = Variable(torch.cuda.LongTensor(checkins)) input_samples = Variable(torch.cuda.LongTensor(samples)) # Optimizing optimizer.zero_grad() _loss = -criterion( drcf(input_user, input_candidate, input_checkins, input_samples)).sum() _loss.backward() optimizer.step() loss += _loss.cpu().data.numpy()[0]