def main(args): # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) # do denoising model = None do_train = (args.even_train_path is not None) or (args.odd_train_path is not None) if do_train: print('# training denoising model!', file=sys.stderr) model, num_devices = train_model(args.even_train_path, args.odd_train_path , args.save_prefix, args.save_interval , args.device , base_kernel_width=args.base_kernel_width , cost_func=args.criteria , learning_rate=args.lr , optim=args.optim , momentum=args.momentum , minibatch_size=args.batch_size , num_epochs=args.num_epochs , N_train=args.N_train , N_test=args.N_test , tilesize=args.crop , num_workers=args.num_workers ) if len(args.volumes) > 0: # tomograms to denoise! if model is None: # need to load model model = load_model(args.model, base_kernel_width=args.base_kernel_width) gaussian_sigma = args.gaussian if gaussian_sigma > 0: print('# apply Gaussian filter postprocessing with sigma={}'.format(gaussian_sigma), file=sys.stderr) model = nn.Sequential(model, GaussianDenoise3d(gaussian_sigma)) model.eval() model, use_cuda, num_devices = set_device(model, args.device) #batch_size = args.batch_size #batch_size *= num_devices batch_size = num_devices patch_size = args.patch_size padding = args.patch_padding print('# denoising with patch size={} and padding={}'.format(patch_size, padding), file=sys.stderr) # denoise the volumes total = len(args.volumes) count = 0 for path in args.volumes: count += 1 denoise(model, path, args.output, args.suffix , patch_size=patch_size , padding=padding , batch_size=batch_size , volume_num=count , total_volumes=total )
def main(args): paths = args.files dest = args.destdir verbose = args.verbose scale = args.scale affine = args.affine num_iters = args.niters alpha = args.alpha beta = args.beta sample = args.sample num_workers = args.num_workers metadata = args.metadata formats = args.format_.split(',') # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) # set CUDA device use_cuda = topaz.cuda.set_device(args.device) if use_cuda: # when using GPU, turn off multiple processes num_workers = 0 if not os.path.exists(dest): os.makedirs(dest) process = Normalize(dest, scale, affine, num_iters, alpha, beta , sample, metadata, formats, use_cuda) if num_workers > 1: pool = mp.Pool(num_workers) for name in pool.imap_unordered(process, paths): if verbose: print('# processed:', name, file=sys.stderr) else: for path in paths: name = process(path) if verbose: print('# processed:', name, file=sys.stderr)
def main(args): verbose = args.verbose # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) ## set the device use_cuda = topaz.cuda.set_device(args.device) ## load the model from topaz.model.factory import load_model model = load_model(args.model) model.eval() model.fill() if use_cuda: model.cuda() ## make output directory if doesn't exist destdir = args.destdir if not os.path.exists(destdir): os.makedirs(destdir) ## load the images and process with the model for path in args.paths: basename = os.path.basename(path) image_name = os.path.splitext(basename)[0] image = load_image(path) ## process image with the model with torch.no_grad(): X = torch.from_numpy(np.array( image, copy=False)).unsqueeze(0).unsqueeze(0) if use_cuda: X = X.cuda() score = model(X).data[0, 0].cpu().numpy() im = Image.fromarray(score) path = os.path.join(destdir, image_name) + '.tiff' if verbose: print('# saving:', path) im.save(path, 'tiff')
def main(args): # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) ## set the device use_cuda = topaz.cuda.set_device(args.device) print('# using device={} with cuda={}'.format(args.device, use_cuda), file=sys.stderr) cutoff = args.pixel_cutoff # pixel truncation limit do_train = (args.dir_a is not None and args.dir_b is not None) or (args.hdf is not None) if do_train: method = args.method paired = (method == 'noise2noise') preload = args.preload holdout = args.holdout # fraction of image pairs to holdout for validation if args.hdf is None: #use dirA/dirB crop = args.crop dir_as = args.dir_a dir_bs = args.dir_b dset_train = [] dset_val = [] for dir_a, dir_b in zip(dir_as, dir_bs): random = np.random.RandomState(44444) if paired: dataset_train, dataset_val = make_paired_images_datasets( dir_a, dir_b, crop, random=random, holdout=holdout, preload=preload, cutoff=cutoff) else: dataset_train, dataset_val = make_images_datasets( dir_a, dir_b, crop, cutoff=cutoff, random=random, holdout=holdout) dset_train.append(dataset_train) dset_val.append(dataset_val) dataset_train = dset_train[0] for i in range(1, len(dset_train)): dataset_train.x += dset_train[i].x if paired: dataset_train.y += dset_train[i].y dataset_val = dset_val[0] for i in range(1, len(dset_val)): dataset_val.x += dset_val[i].x if paired: dataset_val.y += dset_val[i].y shuffle = True else: # make HDF datasets dataset_train, dataset_val = make_hdf5_datasets(args.hdf, paired=paired, cutoff=cutoff, holdout=holdout, preload=preload) shuffle = preload # initialize the model arch = args.arch if arch == 'unet': model = dn.UDenoiseNet() elif arch == 'unet-small': model = dn.UDenoiseNetSmall() elif arch == 'unet2': model = dn.UDenoiseNet2() elif arch == 'unet3': model = dn.UDenoiseNet3() elif arch == 'fcnet': model = dn.DenoiseNet(32) elif arch == 'fcnet2': model = dn.DenoiseNet2(64) elif arch == 'affine': model = dn.AffineDenoise() else: raise Exception('Unknown architecture: ' + arch) if use_cuda: model = model.cuda() # train optim = args.optim lr = args.lr batch_size = args.batch_size num_epochs = args.num_epochs digits = int(np.ceil(np.log10(num_epochs))) num_workers = args.num_workers print('epoch', 'loss_train', 'loss_val') #criteria = nn.L1Loss() criteria = args.criteria if method == 'noise2noise': iterator = dn.train_noise2noise(model, dataset_train, lr=lr, optim=optim, batch_size=batch_size, criteria=criteria, num_epochs=num_epochs, dataset_val=dataset_val, use_cuda=use_cuda, num_workers=num_workers, shuffle=shuffle) elif method == 'masked': iterator = dn.train_mask_denoise(model, dataset_train, lr=lr, optim=optim, batch_size=batch_size, criteria=criteria, num_epochs=num_epochs, dataset_val=dataset_val, use_cuda=use_cuda, num_workers=num_workers, shuffle=shuffle) for epoch, loss_train, loss_val in iterator: print(epoch, loss_train, loss_val) sys.stdout.flush() # save the model if args.save_prefix is not None: path = args.save_prefix + ('_epoch{:0' + str(digits) + '}.sav').format(epoch) #path = args.save_prefix + '_epoch{}.sav'.format(epoch) model.cpu() model.eval() torch.save(model, path) if use_cuda: model.cuda() models = [model] else: # load the saved model(s) models = [] for arg in args.model: if arg == 'none': print('# Warning: no denoising model will be used', file=sys.stderr) else: print('# Loading model:', arg, file=sys.stderr) model = dn.load_model(arg) model.eval() if use_cuda: model.cuda() models.append(model) # using trained model # denoise the images normalize = args.normalize if args.format_ == 'png' or args.format_ == 'jpg': # always normalize png and jpg format normalize = True format_ = args.format_ suffix = args.suffix lowpass = args.lowpass gaus = args.gaussian if gaus > 0: gaus = dn.GaussianDenoise(gaus) if use_cuda: gaus.cuda() else: gaus = None inv_gaus = args.inv_gaussian if inv_gaus > 0: inv_gaus = dn.InvGaussianFilter(inv_gaus) if use_cuda: inv_gaus.cuda() else: inv_gaus = None deconvolve = args.deconvolve deconv_patch = args.deconv_patch ps = args.patch_size padding = args.patch_padding count = 0 # we are denoising a single MRC stack if args.stack: with open(args.micrographs[0], 'rb') as f: content = f.read() stack, _, _ = mrc.parse(content) print('# denoising stack with shape:', stack.shape, file=sys.stderr) total = len(stack) denoised = np.zeros_like(stack) for i in range(len(stack)): mic = stack[i] # process and denoise the micrograph mic = denoise_image(mic, models, lowpass=lowpass, cutoff=cutoff, gaus=gaus, inv_gaus=inv_gaus, deconvolve=deconvolve, deconv_patch=deconv_patch, patch_size=ps, padding=padding, normalize=normalize, use_cuda=use_cuda) denoised[i] = mic count += 1 print('# {} of {} completed.'.format(count, total), file=sys.stderr, end='\r') print('', file=sys.stderr) # write the denoised stack path = args.output print('# writing', path, file=sys.stderr) with open(path, 'wb') as f: mrc.write(f, denoised) else: # stream the micrographs and denoise them total = len(args.micrographs) # make the output directory if it doesn't exist if not os.path.exists(args.output): os.makedirs(args.output) for path in args.micrographs: name, _ = os.path.splitext(os.path.basename(path)) mic = np.array(load_image(path), copy=False).astype(np.float32) # process and denoise the micrograph mic = denoise_image(mic, models, lowpass=lowpass, cutoff=cutoff, gaus=gaus, inv_gaus=inv_gaus, deconvolve=deconvolve, deconv_patch=deconv_patch, patch_size=ps, padding=padding, normalize=normalize, use_cuda=use_cuda) # write the micrograph if not args.output: if suffix == '' or suffix is None: suffix = '.denoised' # write the file to the same location as input no_ext, ext = os.path.splitext(path) outpath = no_ext + suffix + '.' + format_ else: outpath = args.output + os.sep + name + suffix + '.' + format_ save_image(mic, outpath) #, mi=None, ma=None) count += 1 print('# {} of {} completed.'.format(count, total), file=sys.stderr, end='\r') print('', file=sys.stderr)
def main(args): # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) ## initialize the model classifier = make_model(args) if args.describe: ## only print a description of the model and terminate print(classifier) sys.exit() ## set the device """ use_cuda = False if args.device >= 0: use_cuda = torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.device) else: print('WARNING: you specified GPU (device={}) but no GPUs were detected. This may mean there is a mismatch between your system CUDA version and your pytorch CUDA version.'.format(args.device), file=sys.stderr) """ use_cuda = topaz.cuda.set_device(args.device) report('Using device={} with cuda={}'.format(args.device, use_cuda)) if use_cuda: classifier.cuda() ## load the data radius = args.radius # number of pixels around coordinates to label as positive train_images, train_targets, test_images, test_targets = \ load_data(args.train_images, args.train_targets, args.test_images, args.test_targets, radius, format_=args.format_, k_fold=args.k_fold, fold=args.fold, cross_validation_seed=args.cross_validation_seed, image_ext=args.image_ext ) num_positive_regions, total_regions = report_data_stats( train_images, train_targets, test_images, test_targets) ## make the training step method if args.num_particles > 0: expected_num_particles = args.num_particles # make this expected particles in training set rather than per micrograph num_micrographs = sum(len(images) for images in train_images) expected_num_particles *= num_micrographs # given the expected number of particles and the radius # calculate what pi should be # pi = pixels_per_particle*expected_number_of_particles/pixels_in_dataset grid = np.linspace(-radius, radius, 2 * radius + 1) xx = np.zeros((2 * radius + 1, 2 * radius + 1)) + grid[:, np.newaxis] yy = np.zeros((2 * radius + 1, 2 * radius + 1)) + grid[np.newaxis] d2 = xx**2 + yy**2 mask = (d2 <= radius**2).astype(int) pixels_per_particle = mask.sum() # total_regions is number of regions in the data pi = pixels_per_particle * expected_num_particles / total_regions report( 'Specified expected number of particle per micrograph = {}'.format( args.num_particles)) report('With radius = {}'.format(radius)) report('Setting pi = {}'.format(pi)) else: pi = args.pi report('pi = {}'.format(pi)) trainer, criteria, split = make_training_step_method( classifier, num_positive_regions, num_positive_regions / total_regions, lr=args.learning_rate, l2=args.l2, method=args.method, pi=pi, slack=args.slack, autoencoder=args.autoencoder) ## training parameters train_iterator, test_iterator = make_data_iterators( train_images, train_targets, test_images, test_targets, classifier.width, split, args) ## fit the model, report train/test stats, save model if required output = sys.stdout if args.output is None else open(args.output, 'w') save_prefix = args.save_prefix #if not os.path.exists(os.path.dirname(save_prefix)): # os.makedirs(os.path.dirname(save_prefix)) fit_epochs(classifier, criteria, trainer, train_iterator, test_iterator, args.num_epochs, save_prefix=save_prefix, use_cuda=use_cuda, output=output) report('Done!')
def main(args): # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) # score the images lazily with a generator model = args.model device = args.device paths = args.paths batch_size = args.batch_size if len(paths) == 0: # no paths specified, so we read them from stdin paths = stream_inputs(sys.stdin) stream = score_images(model, paths, device=device, batch_size=batch_size) # extract coordinates from scored images threshold = args.threshold radius = args.radius if radius is None: radius = -1 num_workers = args.num_workers pool = None if num_workers < 0: num_workers = multiprocessing.cpu_count() if num_workers > 0: pool = multiprocessing.Pool(num_workers) # if no radius is set, we choose the radius based on targets provided lo = args.min_radius hi = args.max_radius step = args.step_radius match_radius = args.assignment_radius if radius < 0 and args.targets is not None: # set the radius to optimize AUPRC of the targets scores = {k:v for k,v in stream} # process all images for this part stream = scores.items() targets = pd.read_csv(args.targets, sep='\t') target_scores = {name: scores[name] for name in targets.image_name.unique() if name in scores} ## find radius maximizing AUPRC radius, auprc = find_opt_radius(targets, target_scores, threshold, lo=lo, hi=hi, step=step , match_radius=match_radius, pool=pool) elif args.targets is not None: scores = {k:v for k,v in stream} # process all images for this part stream = scores.items() targets = pd.read_csv(args.targets, sep='\t') target_scores = {name: scores[name] for name in targets.image_name.unique() if name in scores} # calculate AUPRC for radius au, rmse, recall, n = extract_auprc(targets, target_scores, radius, threshold , match_radius=match_radius, pool=pool) print('# radius={}, auprc={}, rmse={}, recall={}, targets={}'.format(radius, au, rmse, recall, n)) elif radius < 0: # must have targets if radius < 0 raise Exception('Must specify targets for choosing the extraction radius if extraction radius is not provided') # now, extract all particles from scored images if not args.only_validate: per_micrograph = args.per_micrograph # store one file per micrograph rather than combining all files together suffix = args.suffix # optional suffix to add to particle file paths out_format = args.format f = sys.stdout if args.output is not None and not per_micrograph: f = open(args.output, 'w') scale = args.up_scale/args.down_scale if not per_micrograph: print('image_name\tx_coord\ty_coord\tscore', file=f) ## extract coordinates using radius for path,score,coords in nms_iterator(stream, radius, threshold, pool=pool): basename = os.path.basename(path) name = os.path.splitext(basename)[0] ## scale the coordinates if scale != 1: coords = np.round(coords*scale).astype(int) if per_micrograph: table = pd.DataFrame({'image_name': name, 'x_coord': coords[:,0], 'y_coord': coords[:,1], 'score': score}) out_path,ext = os.path.splitext(path) out_path = out_path + suffix + '.' + out_format with open(out_path, 'w') as f: file_utils.write_table(f, table, format=out_format, image_ext=ext) else: for i in range(len(score)): print(name + '\t' + str(coords[i,0]) + '\t' + str(coords[i,1]) + '\t' + str(score[i]), file=f)
def test_set_num_threads(): assert set_num_threads(0) == 0 assert set_num_threads(1) == 1 assert set_num_threads(-1) > 0
def main(args): # set the number of threads num_threads = args.num_threads from topaz.torch import set_num_threads set_num_threads(num_threads) # score the images lazily with a generator model = args.model device = args.device paths = args.paths batch_size = args.batch_size stream = score_images(model, paths, device=device, batch_size=batch_size) # extract coordinates from scored images threshold = args.threshold radius = args.radius if radius is None: radius = -1 num_workers = args.num_workers pool = None if num_workers < 0: num_workers = multiprocessing.cpu_count() if num_workers > 0: pool = multiprocessing.Pool(num_workers) # if no radius is set, we choose the radius based on targets provided lo = args.min_radius hi = args.max_radius step = args.step_radius match_radius = args.assignment_radius if radius < 0 and args.targets is not None: # set the radius to optimize AUPRC of the targets scores = {k: v for k, v in stream} # process all images for this part stream = scores.items() targets = pd.read_csv(args.targets, sep='\t') target_scores = { name: scores[name] for name in targets.image_name.unique() if name in scores } ## find radius maximizing AUPRC radius, auprc = find_opt_radius(targets, target_scores, threshold, lo=lo, hi=hi, step=step, match_radius=match_radius, pool=pool) elif args.targets is not None: scores = {k: v for k, v in stream} # process all images for this part stream = scores.items() targets = pd.read_csv(args.targets, sep='\t') target_scores = { name: scores[name] for name in targets.image_name.unique() if name in scores } # calculate AUPRC for radius au, rmse, recall, n = extract_auprc(targets, target_scores, radius, threshold, match_radius=match_radius, pool=pool) print('# radius={}, auprc={}, rmse={}, recall={}, targets={}'.format( radius, au, rmse, recall, n)) elif radius < 0: # must have targets if radius < 0 raise Exception( 'Must specify targets for choosing the extraction radius if extraction radius is not provided' ) # now, extract all particles from scored images if not args.only_validate: f = sys.stdout if args.output is not None: f = open(args.output, 'w') scale = args.up_scale / args.down_scale print('image_name\tx_coord\ty_coord\tscore', file=f) ## extract coordinates using radius for name, score, coords in nms_iterator(stream, radius, threshold, pool=pool): ## scale the coordinates if scale != 1: coords = np.round(coords * scale).astype(int) for i in range(len(score)): print(name + '\t' + str(coords[i, 0]) + '\t' + str(coords[i, 1]) + '\t' + str(score[i]), file=f)