Esempio n. 1
0
def main(args):
    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    # do denoising
    model = None
    do_train = (args.even_train_path is not None) or (args.odd_train_path is not None)
    if do_train:
        print('# training denoising model!', file=sys.stderr)
        model, num_devices = train_model(args.even_train_path, args.odd_train_path
                           , args.save_prefix, args.save_interval
                           , args.device
                           , base_kernel_width=args.base_kernel_width
                           , cost_func=args.criteria
                           , learning_rate=args.lr
                           , optim=args.optim
                           , momentum=args.momentum
                           , minibatch_size=args.batch_size
                           , num_epochs=args.num_epochs
                           , N_train=args.N_train
                           , N_test=args.N_test
                           , tilesize=args.crop
                           , num_workers=args.num_workers
                           )

    if len(args.volumes) > 0: # tomograms to denoise!
        if model is None: # need to load model
            model = load_model(args.model, base_kernel_width=args.base_kernel_width)

        gaussian_sigma = args.gaussian
        if gaussian_sigma > 0:
            print('# apply Gaussian filter postprocessing with sigma={}'.format(gaussian_sigma), file=sys.stderr)
            model = nn.Sequential(model, GaussianDenoise3d(gaussian_sigma))
        model.eval()
        
        model, use_cuda, num_devices = set_device(model, args.device)

        #batch_size = args.batch_size
        #batch_size *= num_devices
        batch_size = num_devices

        patch_size = args.patch_size
        padding = args.patch_padding
        print('# denoising with patch size={} and padding={}'.format(patch_size, padding), file=sys.stderr)
        # denoise the volumes
        total = len(args.volumes)
        count = 0
        for path in args.volumes:
            count += 1
            denoise(model, path, args.output, args.suffix
                   , patch_size=patch_size
                   , padding=padding
                   , batch_size=batch_size
                   , volume_num=count
                   , total_volumes=total
                   )
Esempio n. 2
0
def main(args):
    paths = args.files
    dest = args.destdir
    verbose = args.verbose

    scale = args.scale
    affine = args.affine

    num_iters = args.niters
    alpha = args.alpha
    beta = args.beta
    sample = args.sample

    num_workers = args.num_workers
    metadata = args.metadata
    formats = args.format_.split(',')

    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    # set CUDA device
    use_cuda = topaz.cuda.set_device(args.device)
    if use_cuda:
        # when using GPU, turn off multiple processes
        num_workers = 0

    if not os.path.exists(dest):
        os.makedirs(dest)

    process = Normalize(dest, scale, affine, num_iters, alpha, beta
                       , sample, metadata, formats, use_cuda)

    if num_workers > 1:
        pool = mp.Pool(num_workers)
        for name in pool.imap_unordered(process, paths):
            if verbose:
                print('# processed:', name, file=sys.stderr)
    else:
        for path in paths:
            name = process(path)
            if verbose:
                print('# processed:', name, file=sys.stderr)
Esempio n. 3
0
def main(args):
    verbose = args.verbose

    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    ## set the device
    use_cuda = topaz.cuda.set_device(args.device)

    ## load the model
    from topaz.model.factory import load_model
    model = load_model(args.model)
    model.eval()
    model.fill()

    if use_cuda:
        model.cuda()

    ## make output directory if doesn't exist
    destdir = args.destdir
    if not os.path.exists(destdir):
        os.makedirs(destdir)

    ## load the images and process with the model
    for path in args.paths:
        basename = os.path.basename(path)
        image_name = os.path.splitext(basename)[0]
        image = load_image(path)

        ## process image with the model
        with torch.no_grad():
            X = torch.from_numpy(np.array(
                image, copy=False)).unsqueeze(0).unsqueeze(0)
            if use_cuda:
                X = X.cuda()
            score = model(X).data[0, 0].cpu().numpy()

        im = Image.fromarray(score)
        path = os.path.join(destdir, image_name) + '.tiff'
        if verbose:
            print('# saving:', path)
        im.save(path, 'tiff')
Esempio n. 4
0
def main(args):

    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    ## set the device
    use_cuda = topaz.cuda.set_device(args.device)
    print('# using device={} with cuda={}'.format(args.device, use_cuda),
          file=sys.stderr)

    cutoff = args.pixel_cutoff  # pixel truncation limit

    do_train = (args.dir_a is not None
                and args.dir_b is not None) or (args.hdf is not None)
    if do_train:

        method = args.method
        paired = (method == 'noise2noise')
        preload = args.preload
        holdout = args.holdout  # fraction of image pairs to holdout for validation

        if args.hdf is None:  #use dirA/dirB
            crop = args.crop
            dir_as = args.dir_a
            dir_bs = args.dir_b

            dset_train = []
            dset_val = []

            for dir_a, dir_b in zip(dir_as, dir_bs):
                random = np.random.RandomState(44444)
                if paired:
                    dataset_train, dataset_val = make_paired_images_datasets(
                        dir_a,
                        dir_b,
                        crop,
                        random=random,
                        holdout=holdout,
                        preload=preload,
                        cutoff=cutoff)
                else:
                    dataset_train, dataset_val = make_images_datasets(
                        dir_a,
                        dir_b,
                        crop,
                        cutoff=cutoff,
                        random=random,
                        holdout=holdout)
                dset_train.append(dataset_train)
                dset_val.append(dataset_val)

            dataset_train = dset_train[0]
            for i in range(1, len(dset_train)):
                dataset_train.x += dset_train[i].x
                if paired:
                    dataset_train.y += dset_train[i].y

            dataset_val = dset_val[0]
            for i in range(1, len(dset_val)):
                dataset_val.x += dset_val[i].x
                if paired:
                    dataset_val.y += dset_val[i].y

            shuffle = True
        else:  # make HDF datasets
            dataset_train, dataset_val = make_hdf5_datasets(args.hdf,
                                                            paired=paired,
                                                            cutoff=cutoff,
                                                            holdout=holdout,
                                                            preload=preload)
            shuffle = preload

        # initialize the model
        arch = args.arch
        if arch == 'unet':
            model = dn.UDenoiseNet()
        elif arch == 'unet-small':
            model = dn.UDenoiseNetSmall()
        elif arch == 'unet2':
            model = dn.UDenoiseNet2()
        elif arch == 'unet3':
            model = dn.UDenoiseNet3()
        elif arch == 'fcnet':
            model = dn.DenoiseNet(32)
        elif arch == 'fcnet2':
            model = dn.DenoiseNet2(64)
        elif arch == 'affine':
            model = dn.AffineDenoise()
        else:
            raise Exception('Unknown architecture: ' + arch)

        if use_cuda:
            model = model.cuda()

        # train
        optim = args.optim
        lr = args.lr
        batch_size = args.batch_size
        num_epochs = args.num_epochs
        digits = int(np.ceil(np.log10(num_epochs)))

        num_workers = args.num_workers

        print('epoch', 'loss_train', 'loss_val')
        #criteria = nn.L1Loss()
        criteria = args.criteria

        if method == 'noise2noise':
            iterator = dn.train_noise2noise(model,
                                            dataset_train,
                                            lr=lr,
                                            optim=optim,
                                            batch_size=batch_size,
                                            criteria=criteria,
                                            num_epochs=num_epochs,
                                            dataset_val=dataset_val,
                                            use_cuda=use_cuda,
                                            num_workers=num_workers,
                                            shuffle=shuffle)
        elif method == 'masked':
            iterator = dn.train_mask_denoise(model,
                                             dataset_train,
                                             lr=lr,
                                             optim=optim,
                                             batch_size=batch_size,
                                             criteria=criteria,
                                             num_epochs=num_epochs,
                                             dataset_val=dataset_val,
                                             use_cuda=use_cuda,
                                             num_workers=num_workers,
                                             shuffle=shuffle)

        for epoch, loss_train, loss_val in iterator:
            print(epoch, loss_train, loss_val)
            sys.stdout.flush()

            # save the model
            if args.save_prefix is not None:
                path = args.save_prefix + ('_epoch{:0' + str(digits) +
                                           '}.sav').format(epoch)
                #path = args.save_prefix + '_epoch{}.sav'.format(epoch)
                model.cpu()
                model.eval()
                torch.save(model, path)
                if use_cuda:
                    model.cuda()

        models = [model]

    else:  # load the saved model(s)
        models = []
        for arg in args.model:
            if arg == 'none':
                print('# Warning: no denoising model will be used',
                      file=sys.stderr)
            else:
                print('# Loading model:', arg, file=sys.stderr)
            model = dn.load_model(arg)

            model.eval()
            if use_cuda:
                model.cuda()

            models.append(model)

    # using trained model
    # denoise the images

    normalize = args.normalize
    if args.format_ == 'png' or args.format_ == 'jpg':
        # always normalize png and jpg format
        normalize = True

    format_ = args.format_
    suffix = args.suffix

    lowpass = args.lowpass
    gaus = args.gaussian
    if gaus > 0:
        gaus = dn.GaussianDenoise(gaus)
        if use_cuda:
            gaus.cuda()
    else:
        gaus = None
    inv_gaus = args.inv_gaussian
    if inv_gaus > 0:
        inv_gaus = dn.InvGaussianFilter(inv_gaus)
        if use_cuda:
            inv_gaus.cuda()
    else:
        inv_gaus = None
    deconvolve = args.deconvolve
    deconv_patch = args.deconv_patch

    ps = args.patch_size
    padding = args.patch_padding

    count = 0

    # we are denoising a single MRC stack
    if args.stack:
        with open(args.micrographs[0], 'rb') as f:
            content = f.read()
        stack, _, _ = mrc.parse(content)
        print('# denoising stack with shape:', stack.shape, file=sys.stderr)
        total = len(stack)

        denoised = np.zeros_like(stack)
        for i in range(len(stack)):
            mic = stack[i]
            # process and denoise the micrograph
            mic = denoise_image(mic,
                                models,
                                lowpass=lowpass,
                                cutoff=cutoff,
                                gaus=gaus,
                                inv_gaus=inv_gaus,
                                deconvolve=deconvolve,
                                deconv_patch=deconv_patch,
                                patch_size=ps,
                                padding=padding,
                                normalize=normalize,
                                use_cuda=use_cuda)
            denoised[i] = mic

            count += 1
            print('# {} of {} completed.'.format(count, total),
                  file=sys.stderr,
                  end='\r')

        print('', file=sys.stderr)
        # write the denoised stack
        path = args.output
        print('# writing', path, file=sys.stderr)
        with open(path, 'wb') as f:
            mrc.write(f, denoised)

    else:
        # stream the micrographs and denoise them
        total = len(args.micrographs)

        # make the output directory if it doesn't exist
        if not os.path.exists(args.output):
            os.makedirs(args.output)

        for path in args.micrographs:
            name, _ = os.path.splitext(os.path.basename(path))
            mic = np.array(load_image(path), copy=False).astype(np.float32)

            # process and denoise the micrograph
            mic = denoise_image(mic,
                                models,
                                lowpass=lowpass,
                                cutoff=cutoff,
                                gaus=gaus,
                                inv_gaus=inv_gaus,
                                deconvolve=deconvolve,
                                deconv_patch=deconv_patch,
                                patch_size=ps,
                                padding=padding,
                                normalize=normalize,
                                use_cuda=use_cuda)

            # write the micrograph
            if not args.output:
                if suffix == '' or suffix is None:
                    suffix = '.denoised'
                # write the file to the same location as input
                no_ext, ext = os.path.splitext(path)
                outpath = no_ext + suffix + '.' + format_
            else:
                outpath = args.output + os.sep + name + suffix + '.' + format_
            save_image(mic, outpath)  #, mi=None, ma=None)

            count += 1
            print('# {} of {} completed.'.format(count, total),
                  file=sys.stderr,
                  end='\r')
        print('', file=sys.stderr)
Esempio n. 5
0
def main(args):
    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    ## initialize the model
    classifier = make_model(args)

    if args.describe:
        ## only print a description of the model and terminate
        print(classifier)
        sys.exit()

    ## set the device
    """
    use_cuda = False
    if args.device >= 0:
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            torch.cuda.set_device(args.device)
        else:
            print('WARNING: you specified GPU (device={}) but no GPUs were detected. This may mean there is a mismatch between your system CUDA version and your pytorch CUDA version.'.format(args.device), file=sys.stderr)
    """

    use_cuda = topaz.cuda.set_device(args.device)
    report('Using device={} with cuda={}'.format(args.device, use_cuda))

    if use_cuda:
        classifier.cuda()

    ## load the data
    radius = args.radius  # number of pixels around coordinates to label as positive
    train_images, train_targets, test_images, test_targets = \
            load_data(args.train_images,
                      args.train_targets,
                      args.test_images,
                      args.test_targets,
                      radius,
                      format_=args.format_,
                      k_fold=args.k_fold,
                      fold=args.fold,
                      cross_validation_seed=args.cross_validation_seed,
                      image_ext=args.image_ext
                     )
    num_positive_regions, total_regions = report_data_stats(
        train_images, train_targets, test_images, test_targets)

    ## make the training step method
    if args.num_particles > 0:
        expected_num_particles = args.num_particles
        # make this expected particles in training set rather than per micrograph
        num_micrographs = sum(len(images) for images in train_images)
        expected_num_particles *= num_micrographs

        # given the expected number of particles and the radius
        # calculate what pi should be
        # pi = pixels_per_particle*expected_number_of_particles/pixels_in_dataset
        grid = np.linspace(-radius, radius, 2 * radius + 1)
        xx = np.zeros((2 * radius + 1, 2 * radius + 1)) + grid[:, np.newaxis]
        yy = np.zeros((2 * radius + 1, 2 * radius + 1)) + grid[np.newaxis]
        d2 = xx**2 + yy**2
        mask = (d2 <= radius**2).astype(int)
        pixels_per_particle = mask.sum()

        # total_regions is number of regions in the data
        pi = pixels_per_particle * expected_num_particles / total_regions

        report(
            'Specified expected number of particle per micrograph = {}'.format(
                args.num_particles))
        report('With radius = {}'.format(radius))
        report('Setting pi = {}'.format(pi))
    else:
        pi = args.pi
        report('pi = {}'.format(pi))

    trainer, criteria, split = make_training_step_method(
        classifier,
        num_positive_regions,
        num_positive_regions / total_regions,
        lr=args.learning_rate,
        l2=args.l2,
        method=args.method,
        pi=pi,
        slack=args.slack,
        autoencoder=args.autoencoder)

    ## training parameters
    train_iterator, test_iterator = make_data_iterators(
        train_images, train_targets, test_images, test_targets,
        classifier.width, split, args)

    ## fit the model, report train/test stats, save model if required
    output = sys.stdout if args.output is None else open(args.output, 'w')
    save_prefix = args.save_prefix
    #if not os.path.exists(os.path.dirname(save_prefix)):
    #    os.makedirs(os.path.dirname(save_prefix))
    fit_epochs(classifier,
               criteria,
               trainer,
               train_iterator,
               test_iterator,
               args.num_epochs,
               save_prefix=save_prefix,
               use_cuda=use_cuda,
               output=output)

    report('Done!')
Esempio n. 6
0
def main(args):
    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    # score the images lazily with a generator
    model = args.model
    device = args.device
    paths = args.paths
    batch_size = args.batch_size

    if len(paths) == 0: # no paths specified, so we read them from stdin
        paths = stream_inputs(sys.stdin)

    stream = score_images(model, paths, device=device, batch_size=batch_size)

    # extract coordinates from scored images
    threshold = args.threshold

    radius = args.radius
    if radius is None:
        radius = -1

    num_workers = args.num_workers
    pool = None
    if num_workers < 0:
        num_workers = multiprocessing.cpu_count()
    if num_workers > 0:
        pool = multiprocessing.Pool(num_workers)

    # if no radius is set, we choose the radius based on targets provided
    lo = args.min_radius
    hi = args.max_radius
    step = args.step_radius
    match_radius = args.assignment_radius

    if radius < 0 and args.targets is not None: # set the radius to optimize AUPRC of the targets
        scores = {k:v for k,v in stream} # process all images for this part
        stream = scores.items()

        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {name: scores[name] for name in targets.image_name.unique() if name in scores}
        ## find radius maximizing AUPRC
        radius, auprc = find_opt_radius(targets, target_scores, threshold, lo=lo, hi=hi, step=step
                                       , match_radius=match_radius, pool=pool)


    elif args.targets is not None:
        scores = {k:v for k,v in stream} # process all images for this part
        stream = scores.items()

        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {name: scores[name] for name in targets.image_name.unique() if name in scores}
        # calculate AUPRC for radius
        au, rmse, recall, n = extract_auprc(targets, target_scores, radius, threshold
                                           , match_radius=match_radius, pool=pool)
        print('# radius={}, auprc={}, rmse={}, recall={}, targets={}'.format(radius, au, rmse, recall, n))
    elif radius < 0:
        # must have targets if radius < 0
        raise Exception('Must specify targets for choosing the extraction radius if extraction radius is not provided')


    # now, extract all particles from scored images
    if not args.only_validate:
        per_micrograph = args.per_micrograph # store one file per micrograph rather than combining all files together
        suffix = args.suffix # optional suffix to add to particle file paths
        out_format = args.format

        f = sys.stdout
        if args.output is not None and not per_micrograph:
            f = open(args.output, 'w')

        scale = args.up_scale/args.down_scale

        if not per_micrograph:
            print('image_name\tx_coord\ty_coord\tscore', file=f)
        ## extract coordinates using radius 
        for path,score,coords in nms_iterator(stream, radius, threshold, pool=pool):
            basename = os.path.basename(path)
            name = os.path.splitext(basename)[0]
            ## scale the coordinates
            if scale != 1:
                coords = np.round(coords*scale).astype(int)

            if per_micrograph:
                table = pd.DataFrame({'image_name': name, 'x_coord': coords[:,0], 'y_coord': coords[:,1], 'score': score})
                out_path,ext = os.path.splitext(path)
                out_path = out_path + suffix + '.' + out_format
                with open(out_path, 'w') as f:
                    file_utils.write_table(f, table, format=out_format, image_ext=ext)
            else:
                for i in range(len(score)):
                    print(name + '\t' + str(coords[i,0]) + '\t' + str(coords[i,1]) + '\t' + str(score[i]), file=f)
Esempio n. 7
0
def test_set_num_threads():
    assert set_num_threads(0) == 0
    assert set_num_threads(1) == 1
    assert set_num_threads(-1) > 0
Esempio n. 8
0
def main(args):
    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    # score the images lazily with a generator
    model = args.model
    device = args.device
    paths = args.paths
    batch_size = args.batch_size

    stream = score_images(model, paths, device=device, batch_size=batch_size)

    # extract coordinates from scored images
    threshold = args.threshold

    radius = args.radius
    if radius is None:
        radius = -1

    num_workers = args.num_workers
    pool = None
    if num_workers < 0:
        num_workers = multiprocessing.cpu_count()
    if num_workers > 0:
        pool = multiprocessing.Pool(num_workers)

    # if no radius is set, we choose the radius based on targets provided
    lo = args.min_radius
    hi = args.max_radius
    step = args.step_radius
    match_radius = args.assignment_radius

    if radius < 0 and args.targets is not None:  # set the radius to optimize AUPRC of the targets
        scores = {k: v for k, v in stream}  # process all images for this part
        stream = scores.items()

        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {
            name: scores[name]
            for name in targets.image_name.unique() if name in scores
        }
        ## find radius maximizing AUPRC
        radius, auprc = find_opt_radius(targets,
                                        target_scores,
                                        threshold,
                                        lo=lo,
                                        hi=hi,
                                        step=step,
                                        match_radius=match_radius,
                                        pool=pool)

    elif args.targets is not None:
        scores = {k: v for k, v in stream}  # process all images for this part
        stream = scores.items()

        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {
            name: scores[name]
            for name in targets.image_name.unique() if name in scores
        }
        # calculate AUPRC for radius
        au, rmse, recall, n = extract_auprc(targets,
                                            target_scores,
                                            radius,
                                            threshold,
                                            match_radius=match_radius,
                                            pool=pool)
        print('# radius={}, auprc={}, rmse={}, recall={}, targets={}'.format(
            radius, au, rmse, recall, n))
    elif radius < 0:
        # must have targets if radius < 0
        raise Exception(
            'Must specify targets for choosing the extraction radius if extraction radius is not provided'
        )

    # now, extract all particles from scored images
    if not args.only_validate:

        f = sys.stdout
        if args.output is not None:
            f = open(args.output, 'w')

        scale = args.up_scale / args.down_scale

        print('image_name\tx_coord\ty_coord\tscore', file=f)
        ## extract coordinates using radius
        for name, score, coords in nms_iterator(stream,
                                                radius,
                                                threshold,
                                                pool=pool):
            ## scale the coordinates
            if scale != 1:
                coords = np.round(coords * scale).astype(int)
            for i in range(len(score)):
                print(name + '\t' + str(coords[i, 0]) + '\t' +
                      str(coords[i, 1]) + '\t' + str(score[i]),
                      file=f)