def test_cropping(cfg): if not os.path.isdir(cfg.DISPLAY_DIR): os.makedirs(cfg.DISPLAY_DIR) _, data = get_data(cfg) crop_algorithm = cropping_algorithms[cfg.CROP_ALGO](cfg) duration1, duration2 = 0, 0 for i in range(cfg.MAX_STEPS): blob = data.forward() print(np.count_nonzero(blob['data'])) start = time.time() patch_centers, patch_sizes = crop_algorithm.crop(blob['voxels']) end = time.time() duration1 += end - start start = time.time() batch_blobs, patch_centers, patch_sizes = crop_algorithm.extract( patch_centers, patch_sizes, blob) end = time.time() duration2 += end - start print("Cropped into %d images" % len(patch_centers)) duration1 /= cfg.MAX_STEPS duration2 /= cfg.MAX_STEPS print("Average duration = %f + %f" % (duration1, duration2))
def train_demo(cfg, net, criterion, optimizer, lr_scheduler, is_training): # Data generator if cfg.SPARSE: io = get_data_sparse(cfg, is_training=is_training) else: train_data, test_data = get_data(cfg) if is_training: data = train_data else: data = test_data # Initialize the network the right way # net.train and net.eval account for differences in dropout/batch norm # during training and testing start = 0 if is_training: net.train().cuda() else: net.eval().cuda() if cfg.WEIGHTS_FILE_BASE is not None and cfg.WEIGHTS_FILE_BASE != '': print('Restoring weights from %s...' % cfg.WEIGHTS_FILE_BASE) with open(cfg.WEIGHTS_FILE_BASE, 'rb') as f: checkpoint = torch.load(f) net.load_state_dict(checkpoint['state_dict']) # print(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) start = checkpoint['epoch'] + 1 print('Done.') print('Done.') metrics = { 'acc_all': [], 'acc_nonzero': [], 'loss': [], 'memory_allocated': [], 'memory_cached': [], 'durations_forward': [], 'durations_backward': [], 'durations_cuda': [], 'durations': [], 'durations_data': [] } # Only enable gradients if we are training # with torch.set_grad_enabled(is_training): for i in range(cfg.MAX_STEPS): # use with torch.no_grad() for test network print("Step %d/%d" % (i, cfg.MAX_STEPS)) start_step = time.time() start = time.time() if cfg.SPARSE: voxels, features, labels, idx = io.next() else: blob = data.forward(extract_voxels=False) print(blob['data'].shape) end = time.time() metrics['durations_data'].append(end - start) if cfg.SPARSE: start = time.time() coords = torch.from_numpy(voxels).cuda() features = torch.from_numpy(features).cuda() labels = torch.from_numpy(labels).cuda().type( torch.cuda.LongTensor) end = time.time() metrics['durations_cuda'].append(end - start) start = time.time() predictions_raw = net(coords, features) # size N_voxels x num_classes end = time.time() metrics['durations_forward'].append(end - start) else: print('Getting data to GPU...') start = time.time() image = torch.from_numpy(np.moveaxis(blob['data'], -1, 1)).cuda() labels = torch.from_numpy(blob['labels']).cuda().type( torch.cuda.LongTensor) end = time.time() print('Done.') metrics['durations_cuda'].append(end - start) print('Predicting...') start = time.time() predictions_raw = net(image) end = time.time() print('Done.') metrics['durations_forward'].append(end - start) loss = criterion(predictions_raw, labels) if is_training: start = time.time() # lr_scheduler.step() # Decay learning rate optimizer.zero_grad() # Clear previous gradients loss.backward() # Compute gradients of all variables wrt loss nn.utils.clip_grad_norm_(net.parameters(), 1.0) # Clip gradient optimizer.step() # update using computed gradients end = time.time() metrics['durations_backward'].append(end - start) metrics['loss'].append(loss.item()) print("\tLoss = ", metrics['loss'][-1]) # Accuracy predicted_labels = torch.argmax(predictions_raw, dim=1) acc_all = (predicted_labels == labels).sum().item() / float( labels.numel()) nonzero_px = labels > 0 nonzero_prediction = predicted_labels[nonzero_px] nonzero_label = labels[nonzero_px] acc_nonzero = (nonzero_prediction == nonzero_label).sum().item() / float( nonzero_label.numel()) metrics['acc_all'].append(acc_all) metrics['acc_nonzero'].append(acc_nonzero) print("\tAccuracy = ", metrics['acc_all'][-1], " - Nonzero accuracy = ", metrics['acc_nonzero'][-1]) metrics['memory_allocated'].append(torch.cuda.max_memory_allocated()) metrics['memory_cached'].append(torch.cuda.max_memory_cached()) if is_training and i % 1000 == 0: for attr in metrics: np.savetxt(os.path.join(cfg.OUTPUT_DIR, '%s_%d.csv' % (attr, i)), metrics[attr], delimiter=',') if is_training and i % 1000 == 0: filename = os.path.join(cfg.OUTPUT_DIR, 'model-%d.ckpt' % i) # with open(filename, 'wb'): torch.save( { 'epoch': i, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict() }, filename) if not is_training and not cfg.SPARSE: print('Display...') if cfg.SPARSE: final_predictions = np.zeros( (1, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)) indices = label_voxels.T final_predictions[ 0, indices[0], indices[1], indices[2]] = predicted_labels.cpu().data.numpy() display_uresnet(blob, cfg, index=i, predictions=final_predictions) else: display_uresnet( blob, cfg, index=i, predictions=predicted_labels.cpu().data.numpy()) print('Done.') end_step = time.time() metrics['durations'].append(end_step - start_step) # print("Average duration = %f s" % metrics['durations'].mean()) print(metrics) print(np.array(metrics['memory_allocated']).mean()) print(np.array(metrics['memory_cached']).mean())
def inference(cfg, is_testing=False): """ Inference for either PPN or (xor) base network (e.g. UResNet) """ if not os.path.isdir(cfg.DISPLAY_DIR): os.makedirs(cfg.DISPLAY_DIR) if is_testing: _, data = get_data(cfg) else: data, _ = get_data(cfg) net = basenets[cfg.BASE_NET](cfg=cfg) if cfg.WEIGHTS_FILE_PPN is None and cfg.WEIGHTS_FILE_BASE is None: raise Exception("Need a checkpoint file") net.init_placeholders() net.create_architecture(is_training=False) duration = 0 metrics = UResNetMetrics(cfg) FILTERS = tables.Filters(complevel=5, complib='zlib', shuffle=True, bitshuffle=False, fletcher32=False, least_significant_digit=None) f_submission = tables.open_file('/data/codalab/submission_5-6.hdf5', 'w', filters=FILTERS) preds_array = f_submission.create_earray('/', 'pred', tables.UInt32Atom(), (0, 192, 192, 192), expectedrows=data.n) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) load_weights(cfg, sess) for i in range(min(data.n, cfg.MAX_STEPS)): print("%d/%d" % (i, data.n)) blob = data.forward() if is_testing: blob['labels'] = blob['data'][..., 0] start = time.time() summary, results = net.test_image(sess, blob) end = time.time() duration += end - start # Drawing time # display_uresnet(blob, cfg, index=i, **results) if not is_testing: metrics.add(blob, results) mask = np.where(blob['data'][..., 0] > 0) preds = np.reshape(results['predictions'], (1, 192, 192, 192)) print(np.count_nonzero(preds[mask] > 0)) preds[mask] = 0 preds_array.append(preds) print(preds.shape) preds_array.close() f_submission.close() duration /= cfg.MAX_STEPS print("Average duration of inference = %f ms" % duration) if not is_testing: metrics.plot()
def train_demo(cfg, net, criterion, optimizer, lr_scheduler): # Data generator train_data, test_data = get_data(cfg) if is_training: data = train_data else: data = test_data # Initialize the network the right way # net.train and net.eval account for differences in dropout/batch norm # during training and testing start = 0 if is_training: net.train().cuda() else: net.eval().cuda() if cfg.WEIGHTS_FILE_BASE is not None and cfg.WEIGHTS_FILE_BASE != '': print('Restoring weights from %s...' % cfg.WEIGHTS_FILE_BASE) with open(cfg.WEIGHTS_FILE_BASE, 'rb') as f: checkpoint = torch.load(f) net.load_state_dict(checkpoint['state_dict']) # print(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) start = checkpoint['epoch'] + 1 print('Done.') print('Done.') metrics = {'acc_all': [], 'acc_nonzero': [], 'loss': []} # Only enable gradients if we are training # with torch.set_grad_enabled(is_training): durations = [] for i in range(cfg.MAX_STEPS): # use with torch.no_grad() for test network # Check parameters for nan # print('Check for nan...') # had_nan = False # for p in net.parameters(): # if torch.isnan(p).any(): # print(i, p) # had_nan = True # # for name in net.state_dict(): # tensor = net.state_dict()[name] # if name == 'sparseModel.2.4.1.2.4.1.2.4.1.2.4.1.2.0.1.0.runningVar': # print(i, name, tensor) # if torch.isnan(tensor).any(): # print(i, name, tensor) # had_nan = True # if had_nan: # break # print('Done.') # inputs, label = dataloader(i) print("Step %d/%d" % (i, cfg.MAX_STEPS)) blob = data.forward() print(blob['voxels'].shape, blob['voxels_value'].shape, blob['data'].shape, blob['labels'].shape) if sparse: coords = torch.from_numpy(blob['voxels']).cuda() features = torch.from_numpy( np.reshape(blob['voxels_value'], (-1, 1))).cuda() # print(coords.type(), features.type()) start = time.time() predictions_raw = net(coords, features) # size N_voxels x num_classes end = time.time() durations.append(end - start) # print(predictions_raw.size()) label_voxels, labels = extract_voxels(blob['labels']) labels = torch.from_numpy(labels).cuda().type( torch.cuda.LongTensor) # print(labels, label_voxels, blob['voxels']) # print(net.parameters()) else: image = torch.from_numpy(np.moveaxis(blob['data'], -1, 1)).cuda() labels = torch.from_numpy(blob['labels']).cuda().type( torch.cuda.LongTensor) start = time.time() predictions_raw = net(image) end = time.time() durations.append(end - start) loss = criterion(predictions_raw, labels) if is_training: lr_scheduler.step() # Decay learning rate optimizer.zero_grad() # Clear previous gradients loss.backward() # Compute gradients of all variables wrt loss nn.utils.clip_grad_norm_(net.parameters(), 1.0) # Clip gradient optimizer.step() # update using computed gradients metrics['loss'].append(loss.item()) print("\tLoss = ", metrics['loss'][-1]) # Accuracy predicted_labels = torch.argmax(predictions_raw, dim=1) acc_all = (predicted_labels == labels).sum().item() / float( labels.numel()) nonzero_px = labels > 0 nonzero_prediction = predicted_labels[nonzero_px] nonzero_label = labels[nonzero_px] acc_nonzero = (nonzero_prediction == nonzero_label).sum().item() / float( nonzero_label.numel()) metrics['acc_all'].append(acc_all) metrics['acc_nonzero'].append(acc_nonzero) print("\tAccuracy = ", metrics['acc_all'][-1], " - Nonzero accuracy = ", metrics['acc_nonzero'][-1]) if is_training and i % 100 == 0: for attr in metrics: np.savetxt(os.path.join(cfg.OUTPUT_DIR, '%s_%d.csv' % (attr, i)), metrics[attr], delimiter=',') # metrics[attr] = [] if is_training and i % 100 == 0: filename = os.path.join(cfg.OUTPUT_DIR, 'model-%d.ckpt' % i) # with open(filename, 'wb'): torch.save( { 'epoch': i, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict() }, filename) if not is_training: print('Display...') if sparse: final_predictions = np.zeros( (1, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)) indices = label_voxels.T final_predictions[ 0, indices[0], indices[1], indices[2]] = predicted_labels.cpu().data.numpy() display_uresnet(blob, cfg, index=i, predictions=final_predictions) else: display_uresnet( blob, cfg, index=i, predictions=predicted_labels.cpu().data.numpy()) print('Done.') print("Average duration = %f s" % np.array(durations).mean())
def test(cfg, net): _, data = get_data(cfg) # Initialize the network the right way # net.train and net.eval account for differences in dropout/batch norm # during training and testing net.eval().cuda() metrics = {'acc_all': [], 'acc_nonzero': [], 'loss': []} metrics_mean = {'acc_all': [], 'acc_nonzero': [], 'loss': []} metrics_std = {'acc_all': [], 'acc_nonzero': [], 'loss': []} durations_mean = {'cuda': [], 'loss': [], 'forward': [], 'acc': []} durations_std = {'cuda': [], 'loss': [], 'forward': [], 'acc': []} # Only enable gradients if we are training # with torch.set_grad_enabled(is_training): durations, durations_cuda, durations_loss, durations_acc = [], [], [], [] steps = [] print('Listing weights...') weights = glob.glob(os.path.join(cfg.WEIGHTS_FILE_BASE, "*.ckpt")) weights.sort() print('Done.') blobs = [] print('Fetch data...') for i in range(cfg.MAX_STEPS): print("%d/%d" % (i, cfg.MAX_STEPS)) blob = data.forward() blob.pop('data') blob['label_voxels'], blob['label_values'] = extract_voxels( blob['labels']) blob.pop('labels') blobs.append(blob) print('Done.') for w in weights: step = int(re.findall(r'model-(\d+)', w)[0]) steps.append(step) print('Restoring weights from %s...' % w) with open(w, 'rb') as f: checkpoint = torch.load(f) net.load_state_dict(checkpoint['state_dict']) print('Done.') for i, blob in enumerate(blobs): # FIXME print("Step %d/%d" % (i, cfg.MAX_STEPS)) if sparse: start = time.time() coords = torch.from_numpy(blob['voxels']).cuda() features = torch.from_numpy( np.reshape(blob['voxels_value'], (-1, 1))).cuda() label_voxels, labels = blob['label_voxels'], blob[ 'label_values'] labels = torch.from_numpy(labels).cuda().type( torch.cuda.LongTensor) end = time.time() durations_cuda.append(end - start) start = time.time() predictions_raw = net(coords, features) # size N_voxels x num_classes end = time.time() durations.append(end - start) else: start = time.time() image = torch.from_numpy(np.moveaxis(blob['data'], -1, 1)).cuda() labels = torch.from_numpy(blob['labels']).cuda().type( torch.cuda.LongTensor) end = time.time() durations_cuda.append(end - start) start = time.time() predictions_raw = net(image) end = time.time() durations.append(end - start) start = time.time() loss = criterion(predictions_raw, labels) end = time.time() durations_loss.append(end - start) metrics['loss'].append(loss.item()) print("\tLoss = ", metrics['loss'][-1]) # Accuracy start = time.time() predicted_labels = torch.argmax(predictions_raw, dim=1) acc_all = (predicted_labels == labels).sum().item() / float( labels.numel()) nonzero_px = labels > 0 nonzero_prediction = predicted_labels[nonzero_px] nonzero_label = labels[nonzero_px] acc_nonzero = (nonzero_prediction == nonzero_label).sum().item() / float( nonzero_label.numel()) end = time.time() durations_acc.append(end - start) metrics['acc_all'].append(acc_all) metrics['acc_nonzero'].append(acc_nonzero) print("\tAccuracy = ", metrics['acc_all'][-1], " - Nonzero accuracy = ", metrics['acc_nonzero'][-1]) metrics_mean['loss'].append(np.array(metrics['loss']).mean()) metrics_std['loss'].append(np.array(metrics['loss']).std()) metrics_mean['acc_all'].append(np.array(metrics['acc_all']).mean()) metrics_std['acc_all'].append(np.array(metrics['acc_all']).std()) metrics_mean['acc_nonzero'].append( np.array(metrics['acc_nonzero']).mean()) metrics_std['acc_nonzero'].append( np.array(metrics['acc_nonzero']).std()) durations_mean['cuda'].append(np.array(durations_cuda).mean()) durations_std['cuda'].append(np.array(durations_cuda).std()) durations_mean['loss'].append(np.array(durations_loss).mean()) durations_std['loss'].append(np.array(durations_loss).std()) durations_mean['forward'].append(np.array(durations).mean()) durations_std['forward'].append(np.array(durations).std()) durations_mean['acc'].append(np.array(durations_acc).mean()) durations_std['acc'].append(np.array(durations_acc).std()) durations, durations_cuda, durations_loss, durations_acc = [], [], [], [] metrics = {'acc_all': [], 'acc_nonzero': [], 'loss': []} print('Mean cuda duration = %f s' % durations_mean['cuda'][-1]) print('Mean loss duration = %f s' % durations_mean['loss'][-1]) print('Mean acc duration = %f s' % durations_mean['acc'][-1]) print('Mean forward duration = %f s' % durations_mean['forward'][-1]) print('Mean acc = %f s' % metrics_mean['acc_nonzero'][-1]) np.savetxt(os.path.join(cfg.OUTPUT_DIR, 'steps_%d.csv' % step), steps, delimiter=',') for attr in metrics: np.savetxt(os.path.join(cfg.OUTPUT_DIR, '%s_mean_%d.csv' % (attr, step)), metrics_mean[attr], delimiter=',') np.savetxt(os.path.join(cfg.OUTPUT_DIR, '%s_std_%d.csv' % (attr, step)), metrics_std[attr], delimiter=',') for attr in durations_mean: np.savetxt(os.path.join(cfg.OUTPUT_DIR, 'durations_%s_mean_%d.csv' % (attr, step)), durations_mean[attr], delimiter=',') np.savetxt(os.path.join(cfg.OUTPUT_DIR, 'durations_%s_std_%d.csv' % (attr, step)), durations_std[attr], delimiter=',')
def main(cfg): if not os.path.isdir(cfg.DISPLAY_DIR): os.makedirs(cfg.DISPLAY_DIR) _, data = get_data(cfg) # net = PPN(cfg=cfg, base_net=basenets[cfg.BASE_NET]) net = basenets[cfg.BASE_NET](cfg) net.init_placeholders() net.create_architecture(is_training=True) duration = 0 if cfg.PROFILE: print('WARNING PROFILING ENABLED') run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # print(getargspec(self.sess.run)) run_metadata = tf.RunMetadata() old_run = tf.Session.run new_run = lambda self, fetches, feed_dict=None: old_run( self, fetches, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) tf.Session.run = new_run crop_algorithm = cropping_algorithms[cfg.CROP_ALGO](cfg) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # load_weights(cfg, sess) real_step = 0 for i in range(cfg.MAX_STEPS): print("%d/%d" % (i, cfg.MAX_STEPS)) blob = data.forward() # Cropping pre-processing patch_centers, patch_sizes = None, None if cfg.ENABLE_CROP: batch_blobs, patch_centers, patch_sizes = crop_algorithm.process( blob) else: batch_blobs = [blob] for j, blob in enumerate(batch_blobs): real_step += 1 feed_dict = { net.image_placeholder: blob['data'], net.pixel_labels_placeholder: blob['labels'], net.learning_rate_placeholder: net.learning_rate, # net.gt_pixels_placeholder: blob['gt_pixels'] } print(j) start = time.time() # summary, results = net.test_image(sess, blob) # _ = sess.run([net.last_layer], feed_dict=feed_dict) # _ = sess.run([net._predictions['rois']], feed_dict=feed_dict) # _ = sess.run([net.rpn_pooling], feed_dict=feed_dict) # _ = sess.run([net._predictions['ppn2_proposals']], feed_dict=feed_dict) # _ = sess.run([net.before_nms], feed_dict=feed_dict) # _ = sess.run([net.after_nms], feed_dict=feed_dict) # _ = sess.run([net._predictions['im_proposals']], feed_dict=feed_dict) _ = sess.run([net.train_op], feed_dict=feed_dict) end = time.time() duration += end - start if cfg.PROFILE: # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(cfg.PROFILE_NAME, 'w') as f: f.write(ctf) print("Wrote timeline to %s" % cfg.PROFILE_NAME) # Print to stdout an analysis of the memory usage and the timing information # broken down by python codes. ProfileOptionBuilder = tf.profiler.ProfileOptionBuilder opts = ProfileOptionBuilder( ProfileOptionBuilder.time_and_memory()).with_node_names( show_name_regexes=['.*uresnet.*']).build() tf.profiler.profile(tf.get_default_graph(), run_meta=run_metadata, cmd='code', options=opts) # Print to stdout an analysis of the memory usage and the timing information # broken down by operation types. tf.profiler.profile( tf.get_default_graph(), run_meta=run_metadata, cmd='op', options=tf.profiler.ProfileOptionBuilder.time_and_memory()) duration /= cfg.MAX_STEPS print("Average duration of inference = %f ms" % duration)