Ejemplo n.º 1
0
def main(args):
    dfs = []
    for path in args.paths:
        coords = pd.read_csv(path, sep='\t')
        dfs.append(coords)
    coords = pd.concat(dfs, axis=0)

    coords = coords.drop_duplicates()
    print(len(coords))

    if not os.path.exists(args.destdir):
        os.makedirs(args.destdir)

    invert_y = args.invert_y

    for image_name, group in coords.groupby('image_name'):
        path = args.destdir + '/' + image_name + '_info.json'

        shape = None
        if invert_y:
            impath = os.path.join(args.imagedir,
                                  image_name) + '.' + args.image_ext
            # use glob incase image_ext is '*'
            impath = glob.glob(impath)[0]
            im = load_image(impath)
            shape = (im.height, im.width)

        xy = group[['x_coord', 'y_coord']].values.astype(int)
        boxes = coordinates_to_eman2_json(xy, shape=shape, invert_y=invert_y)

        with open(path, 'w') as f:
            json.dump({'boxes': boxes}, f, indent=0)
Ejemplo n.º 2
0
    def __call__(self, path):
        # load the image
        x = np.array(load_image(path), copy=False).astype(np.float32)

        if self.scale > 1:
            x = downsample(x, self.scale)

        # normalize it
        method = 'gmm'
        if self.affine:
            method = 'affine'
        x,metadata = normalize(x, alpha=self.alpha, beta=self.beta, num_iters=self.num_iters
                              , method=method, sample=self.sample, use_cuda=self.use_cuda)

        # save the image and the metadata
        name,_ = os.path.splitext(os.path.basename(path))
        base = os.path.join(self.dest, name)
        for f in self.formats:
            save_image(x, base, f=f)

        if self.metadata:
            # save the metadata in json format
            mdpath = base + '.metadata.json'
            if not self.affine:
                metadata['mus'] = metadata['mus'].tolist()
                metadata['stds'] = metadata['stds'].tolist()
                metadata['pis'] = metadata['pis'].tolist()
                metadata['logps'] = metadata['logps'].tolist()
            with open(mdpath, 'w') as f:
                json.dump(metadata, f, indent=4)

        return name
Ejemplo n.º 3
0
def load_images(paths):
    images = {}
    for path in paths:
        name = os.path.splitext(os.path.basename(path))[0]
        im = load_image(path)
        images[name] = np.array(im, copy=False)
    return images
Ejemplo n.º 4
0
 def load_image(self, path):
     x = np.array(load_image(path), copy=False)
     mu = x.mean()
     std = x.std()
     x = (x - mu) / std
     if self.cutoff > 0:
         x[(x < -self.cutoff) | (x > self.cutoff)] = 0
     return x
Ejemplo n.º 5
0
 def load_image(self, path):
     x = np.array(load_image(path), copy=False)
     x = x.astype(np.float32)  # make sure dtype is single precision
     mu = x.mean()
     std = x.std()
     x = (x - mu) / std
     if self.cutoff > 0:
         x[(x < -self.cutoff) | (x > self.cutoff)] = 0
     return x
Ejemplo n.º 6
0
def main(args):
    verbose = args.verbose

    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    ## set the device
    use_cuda = topaz.cuda.set_device(args.device)

    ## load the model
    from topaz.model.factory import load_model
    model = load_model(args.model)
    model.eval()
    model.fill()

    if use_cuda:
        model.cuda()

    ## make output directory if doesn't exist
    destdir = args.destdir
    if not os.path.exists(destdir):
        os.makedirs(destdir)

    ## load the images and process with the model
    for path in args.paths:
        basename = os.path.basename(path)
        image_name = os.path.splitext(basename)[0]
        image = load_image(path)

        ## process image with the model
        with torch.no_grad():
            X = torch.from_numpy(np.array(
                image, copy=False)).unsqueeze(0).unsqueeze(0)
            if use_cuda:
                X = X.cuda()
            score = model(X).data[0, 0].cpu().numpy()

        im = Image.fromarray(score)
        path = os.path.join(destdir, image_name) + '.tiff'
        if verbose:
            print('# saving:', path)
        im.save(path, 'tiff')
Ejemplo n.º 7
0
def main(args):
    ## load image
    path = args.file
    im = load_image(path)
    # convert PIL image to array
    im = np.array(im, copy=False).astype(np.float32)

    scale = args.scale  # how much to downscale by
    small = downsample(im, scale)

    if args.verbose:
        print('Downsample image:', path, file=sys.stderr)
        print('From', im.shape, 'to', small.shape, file=sys.stderr)

    # write the downsampled image
    with open(args.output, 'wb') as f:
        im = Image.fromarray(small)
        if small.dtype == np.uint8:
            im.save(f, 'png')
        else:
            im.save(f, 'tiff')
Ejemplo n.º 8
0
def main(args):
    verbose = args.verbose

    ## set the device
    use_cuda = False
    if args.device >= 0:
        use_cuda = torch.cuda.is_available()
        torch.cuda.set_device(args.device)

    ## load the model
    model = torch.load(args.model)
    model.eval()
    model.fill()

    if use_cuda:
        model.cuda()

    ## make output directory if doesn't exist
    destdir = args.destdir 
    if not os.path.exists(destdir):
        os.makedirs(destdir)

    ## load the images and process with the model
    for path in args.paths:
        basename = os.path.basename(path)
        image_name = os.path.splitext(basename)[0]
        image = load_image(path)

        ## process image with the model
        X = torch.from_numpy(np.array(image, copy=False)).unsqueeze(0).unsqueeze(0)
        if use_cuda:
            X = X.cuda()
        X = Variable(X, volatile=True)
        score = model(X).data[0,0].cpu().numpy()
        
        im = Image.fromarray(score) 
        path = os.path.join(destdir, image_name) + '.tiff'
        if verbose:
            print('# saving:', path)
        im.save(path, 'tiff')
Ejemplo n.º 9
0
def main(args):
    dfs = []
    for path in args.paths:
        coords = pd.read_csv(path, sep='\t')
        dfs.append(coords)
    coords = pd.concat(dfs, axis=0)

    coords = coords.drop_duplicates()

    if not os.path.exists(args.destdir):
        os.makedirs(args.destdir)

    invert_y = args.invert_y

    for image_name, group in coords.groupby('image_name'):
        path = args.destdir + '/' + image_name + '.box'

        shape = None
        if invert_y:
            impath = os.path.join(args.imagedir,
                                  image_name) + '.' + args.image_ext
            # use glob incase image_ext is '*'
            impath = glob.glob(impath)[0]
            im = load_image(impath)
            shape = (im.height, im.width)

        xy = group[['x_coord', 'y_coord']].values.astype(np.int32)

        boxes = coordinates_to_boxes(xy,
                                     args.boxsize,
                                     args.boxsize,
                                     shape=shape,
                                     invert_y=invert_y)
        boxes = pd.DataFrame(boxes)

        boxes.to_csv(path, sep='\t', header=False, index=False)
Ejemplo n.º 10
0
def stream_images(paths):
    for path in paths:
        image = load_image(path)
        image = np.array(image, copy=False)
        yield image
Ejemplo n.º 11
0
def main(args):

    ## set the device
    use_cuda = False
    if args.device >= 0:
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            torch.cuda.set_device(args.device)
    print('# using device={} with cuda={}'.format(args.device, use_cuda),
          file=sys.stderr)

    do_train = (args.dir_a is not None
                and args.dir_b is not None) or (args.hdf is not None)
    if do_train:
        if args.hdf is None:  #use dirA/dirB
            crop = args.crop
            dir_a = args.dir_a
            dir_b = args.dir_b
            random = np.random.RandomState(44444)

            dataset_train, dataset_val = make_paired_images_datasets(
                dir_a, dir_b, crop, random=random)
            shuffle = True
        else:  # make HDF datasets
            dataset_train, dataset_val = make_hdf5_datasets(args.hdf)
            shuffle = False

        # initialize the model
        #model = dn.DenoiseNet(32)
        model = dn.UDenoiseNet()
        if use_cuda:
            model = model.cuda()

        # train
        lr = args.lr
        batch_size = args.batch_size
        num_epochs = args.num_epochs

        num_workers = args.num_workers

        print('epoch', 'loss_train', 'loss_val')
        #criteria = nn.L1Loss()
        criteria = args.criteria

        for epoch, loss_train, loss_val in dn.train_noise2noise(
                model,
                dataset_train,
                lr=lr,
                batch_size=batch_size,
                criteria=criteria,
                num_epochs=num_epochs,
                dataset_val=dataset_val,
                use_cuda=use_cuda,
                num_workers=num_workers,
                shuffle=shuffle):
            print(epoch, loss_train, loss_val)
            sys.stdout.flush()

            # save the model
            if args.save_prefix is not None:
                path = args.save_prefix + '_epoch{}.sav'.format(epoch)
                model.cpu()
                model.eval()
                torch.save(model, path)
                if use_cuda:
                    model.cuda()

    else:  # load the saved model
        if args.model in ['L0', 'L1', 'L2']:
            if args.model in ['L0', 'L1']:
                print(
                    'ERROR: L0 and L1 models are not implemented in the current version',
                    file=sys.stderr)
                sys.exit(1)
            model = dn.load_model(args.model)
        else:
            model = torch.load(args.model)
        print('# using model:', args.model, file=sys.stderr)
        model.eval()
        if use_cuda:
            model.cuda()

    if args.stack:
        # we are denoising a single MRC stack
        with open(args.micrographs[0], 'rb') as f:
            content = f.read()
        stack, _, _ = mrc.parse(content)
        print('# denoising stack with shape:', stack.shape, file=sys.stderr)

        denoised = dn.denoise_stack(model, stack, use_cuda=use_cuda)

        # write the denoised stack
        path = args.output
        print('# writing', path, file=sys.stderr)
        with open(path, 'wb') as f:
            mrc.write(f, denoised)

    else:
        # using trained model
        # stream the micrographs and denoise as we go

        normalize = args.normalize
        if args.format_ == 'png' or args.format_ == 'jpg':
            # always normalize png and jpg format
            normalize = True

        format_ = args.format_

        count = 0
        total = len(args.micrographs)

        bin_ = args.bin
        ps = args.patch_size
        padding = args.patch_padding

        # now, stream the micrographs and denoise them
        for path in args.micrographs:
            name, _ = os.path.splitext(os.path.basename(path))
            mic = np.array(load_image(path), copy=False)
            if bin_ > 1:
                mic = downsample(mic, bin_)
            mu = mic.mean()
            std = mic.std()

            # denoise
            mic = (mic - mu) / std
            mic = dn.denoise(model,
                             mic,
                             patch_size=ps,
                             padding=padding,
                             use_cuda=use_cuda)

            if normalize:
                mic = (mic - mic.mean()) / mic.std()
            else:
                # add back std. dev. and mean
                mic = std * mic + mu

            # write the micrograph
            outpath = args.output + os.sep + name + '.' + format_
            save_image(mic, outpath)

            count += 1
            print('# {} of {} completed.'.format(count, total),
                  file=sys.stderr,
                  end='\r')

        print('', file=sys.stderr)
Ejemplo n.º 12
0
                    help="radius for overlapped points",
                    type=int)
parser.add_argument(
    '--assort_color',
    default=0,
    help=
    "set to 1 if you want helical groups to be displayed in different colors",
    type=bool)

## load the micrographs for visualization

args = parser.parse_args()

name = args.filename

im = np.array(load_image(name))

name = os.path.splitext(os.path.basename(name))[0]
prefix = args.prefix
suffix = args.suffix
binfactor = args.binfactor
directory = args.directory

ground_truth = os.path.splitext(args.ground_truth)[0]
assort_color = args.assort_color


def rebin(arr, new_shape):
    shape = (new_shape[0], arr.shape[0] // new_shape[0], new_shape[1],
             arr.shape[1] // new_shape[1])
    return arr.reshape(shape).mean(-1).mean(1)
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "micrographs",
        help=
        "The specified path for the micrograph you want to visualize, including its name"
    )
    parser.add_argument(
        "topaz_predictions",
        help=
        "The specified path to the topaz coordinate file you have generated from training"
    )
    parser.add_argument("--r",
                        '--radius',
                        help="The radius of particles",
                        const=15)
    parser.add_argument(
        "train_test_split_files",
        help="path to the train test split files generated from topaz")

    args = parser.parse_args()

    predicted_particles = pd.read_csv(args.topaz_predictions, sep='\t')

    _ = plt.hist(predicted_particles.score, bins=50)
    plt.xlabel('Predicted score (predicted log-likelihood ratio)')
    plt.ylabel('Number of particles')
    plt.show()

    proba = 1.0 / (1.0 + np.exp(-predicted_particles.score))
    _ = plt.hist(proba, bins=50)
    plt.xlabel('Predicted probability of y=1')
    plt.ylabel('Number of particles')
    plt.show()

    threshold = input("Enter a threshold value you would like to test")
    print("Number of particles above threshold:" +
          str(np.sum(predicted_particles.score >= float(threshold))))

    ## load the micrographs for visualization
    micrographs = {}
    for path in glob.glob(args.micrographs):
        im = np.array(load_image(path), copy=False)
        name, _ = os.path.splitext(os.path.basename(path))
        micrographs[name] = im

    ## load the train/test split so we can look at results on test set only!
    images_train = pd.read_csv(os.path.join(args.train_test_split,
                                            "image_list_train.txt"),
                               sep='\t')
    images_train = set(images_train.image_name)

    images_test = pd.read_csv(os.path.join(args.train_test_split,
                                           "image_list_test.txt"),
                              sep='\t')
    images_test = set(images_test.image_name)
    image_names = list(images_test)  # micrograph names for the test set

    proceed = True
    while (proceed):
        name = input("Name of the file you would like to visualize:")
        radius = input("Radius of particle size you would like to visualize")
        lowerbound = input("Lower bound of the threshold")
        upperbound = input("Upper bound of the threshold")
        visualizer(micrographs, predicted_particles, name, radius, lowerbound,
                   upperbound)

        response = input("Visualize more micrographs? (Y/N)")
        if response == "Y" or response == "y" or response == "yes" or response == "Yes":
            continue
        else:
            proceed = False
Ejemplo n.º 14
0
parser.add_argument(
    "PATH", help="The specified path for the micrographs, including PATH")
parser.add_argument("PLOTS_PATH",
                    help="The specified path for the fitted plot coordinates")
parser.add_argument("--r", '--radius', help="The radius of particles")

parser.add_argument(
    "--s",
    "--SAVE_PATH",
    help="If specified, will save the file to the specified location")

args = parser.parse_args()

micrographs = {}
for path in glob.glob(args.PATH):
    im = np.array(load_image(path), copy=False)
    name, _ = os.path.splitext(os.path.basename(path))
    micrographs[name] = im

Continue = True
while (Continue):

    name = input(
        "Enter the name of the microtubule file, do not include any file extensions (e.g. .mrc"
    )

    print("visualization of filament fitting results for " + name)
    im = micrographs[name]

    # visualize predicted particles with log-likelihood ratio >= 0 (p >= 0.5)
Ejemplo n.º 15
0
def main(args):
    if args.model is not None:  ## load images and segment them with the model
        ## set the device
        use_cuda = False
        if args.device >= 0:
            use_cuda = torch.cuda.is_available()
            torch.cuda.set_device(args.device)

        ## load the model
        model = torch.load(args.model)
        model.eval()
        model.fill()

        if use_cuda:
            model.cuda()

        ## load the images and process with the model
        scores = {}
        for path in args.paths:
            basename = os.path.basename(path)
            image_name = os.path.splitext(basename)[0]
            image = load_image(path)

            ## process image with the model
            X = torch.from_numpy(np.array(
                image, copy=False)).unsqueeze(0).unsqueeze(0)
            if use_cuda:
                X = X.cuda()
            X = Variable(X, volatile=True)
            score = model(X).data[0, 0].cpu().numpy()

            scores[image_name] = score

    else:  # images are already segmented
        scores = {}
        for path in args.paths:
            basename = os.path.basename(path)
            image_name = os.path.splitext(basename)[0]
            image = load_image(path)
            scores[image_name] = np.array(image, copy=False)

    percentile = args.threshold * 100
    scores_concat = np.concatenate(
        [array.ravel() for array in scores.values()], 0)
    threshold = np.percentile(scores_concat, percentile)

    radius = args.radius
    if radius is None:
        radius = -1

    lo = args.min_radius
    hi = args.max_radius
    step = args.step_radius
    match_radius = args.assignment_radius

    num_workers = args.num_workers
    pool = None
    if num_workers > 0:
        from multiprocessing import Pool
        pool = Pool(num_workers)

    if radius < 0 and args.targets is not None:  # set the radius to optimize AUPRC of the targets
        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {
            name: scores[name]
            for name in targets.image_name.unique() if name in scores
        }
        ## find radius maximizing AUPRC
        radius, auprc = find_opt_radius(targets,
                                        target_scores,
                                        threshold,
                                        lo=lo,
                                        hi=hi,
                                        step=step,
                                        match_radius=match_radius,
                                        pool=pool)
    elif args.targets is not None:
        targets = pd.read_csv(args.targets, sep='\t')
        target_scores = {
            name: scores[name]
            for name in targets.image_name.unique() if name in scores
        }
        # calculate AUPRC for radius
        au, rmse, recall, n = extract_auprc(targets,
                                            target_scores,
                                            radius,
                                            threshold,
                                            match_radius=match_radius,
                                            pool=pool)
        print('# radius={}, auprc={}, rmse={}, recall={}, targets={}'.format(
            radius, au, rmse, recall, n))
    elif radius < 0:
        # must have targets if radius < 0
        raise Exception(
            'Must specify targets for choosing the extraction radius if extraction radius is not provided'
        )

    f = sys.stdout
    if args.output is not None:
        f = open(args.output, 'w')

    if not args.only_validate:
        print('image_name\tx_coord\ty_coord\tscore', file=f)
        ## extract coordinates using radius
        for name, score, coords in nms_iterator(scores,
                                                radius,
                                                threshold,
                                                pool=pool):
            for i in range(len(score)):
                print(name + '\t' + str(coords[i, 0]) + '\t' +
                      str(coords[i, 1]) + '\t' + str(score[i]),
                      file=f)
Ejemplo n.º 16
0
def main(args):

    verbose = args.verbose

    form = args._from
    from_forms = [form for _ in range(len(args.files))]

    # detect the input file formats
    if form == 'auto':
        try:
            from_forms = [file_utils.detect_format(path) for path in args.files]
        except file_utils.UnknownFormatError as e:
            print('Error: unrecognized input coordinates file extension ('+e.ext+')', file=sys.stderr)
            sys.exit(1)
    formats_detected = list(set(from_forms))
    if verbose > 0:
        print('# INPUT formats detected: '+str(formats_detected), file=sys.stderr)

    # determine the output file format
    output_path = args.output
    output = None
    to_form = args.to
    if output_path is None:
        output = sys.stdout
        # if output is to stdout and form is not set
        # then raise an error
        if to_form == 'auto':
            if len(formats_detected) == 1:
                # write the same output format as input format
                to_form = from_forms[0]
            else:
                print('Error: writing file to stdout and multiple input formats present with no output format (--to) set! Please tell me what format to write!')
                sys.exit(1)
        if to_form == 'box' or to_form == 'json':
            print('Error: writing BOX or JSON output files requires a destination directory. Please set the --output parameter!')
            sys.exit(1)

    image_ext = args.image_ext
    boxsize = args.boxsize
    if to_form == 'auto':
        # first check for directory
        if output_path[-1] == '/':
            # image-ext must be set for these file formats
            if image_ext is None:
                print('Error: writing BOX or JSON output files requires setting the image file extension!')
                sys.exit(1)
            # format is either json or box, check for boxsize to decide
            if boxsize > 0:
                # write boxes!
                if verbose > 0:
                    print('# Detected output format is BOX, because OUTPUT is a directory and boxsize > 0.', file=sys.stderr)
                to_form = 'box'
            else:
                if verbose > 0:
                    print('# Detected output format is JSON, because OUTPUT is a directory and no boxsize set.', file=sys.stderr)
                to_form = 'json'
        else:
            try:
                to_form = file_utils.detect_format(output_path)
            except file_utils.UnkownFormatError as e:
                print('Error: unrecognized output coordinates file extension ('+e.ext+')', file=sys.stderr)
                sys.exit(1)
    if verbose > 0:
        print('# OUTPUT format: ' + to_form)

    suffix = args.suffix

    t = args.threshold
    down_scale = args.down_scale
    up_scale = args.up_scale
    scale = up_scale/down_scale

    # special case when inputs and outputs are all star files
    if len(formats_detected) == 1 and formats_detected[0] == 'star' and to_form == 'star':
        dfs = []
        for path in args.files:
            with open(path, 'r') as f:
                table = star.parse(f)
            dfs.append(table)
        table = pd.concat(dfs, axis=0)
        # convert  score column to float and apply threshold
        if star.SCORE_COLUMN_NAME in table.columns:
            table = table.loc[table[star.SCORE_COLUMN_NAME] >= t]
        # scale coordinates
        if scale != 1:
            x_coord = table[star.X_COLUMN_NAME].values
            x_coord = np.round(scale*x_coord).astype(int)
            table[star.X_COLUMN_NAME] = x_coord
            y_coord = table[star.Y_COLUMN_NAME].values
            y_coord = np.round(scale*y_coord).astype(int)
            table[star.Y_COLUMN_NAME] = y_coord
        # add metadata if specified
        if args.voltage > 0:
            table[star.VOLTAGE] = args.voltage
        if args.detector_pixel_size > 0:
            table[star.DETECTOR_PIXEL_SIZE] = args.detector_pixel_size
        if args.magnification > 0:
            table[star.MAGNIFICATION] = args.magnification
        if args.amplitude_contrast > 0:
            table[star.AMPLITUDE_CONTRAST] = args.amplitude_contrast
        # write output file
        if output is None:
            with open(output_path, 'w') as f:
                star.write(table, f)
        else:
            star.write(table, output)
    

    else: # general case

        # read the input files
        dfs = []
        for i in range(len(args.files)):
            path = args.files[i]
            coords = file_utils.read_coordinates(path, format=from_forms[i])
            dfs.append(coords)
        coords = pd.concat(dfs, axis=0)

        # threshold particles by score (if there is a score)
        if 'score' in coords.columns:
            coords = coords.loc[coords['score'] >= t]

        # scale coordinates
        if scale != 1:
            x_coord = coords['x_coord'].values
            x_coord = np.round(scale*x_coord).astype(int)
            coords['x_coord'] = x_coord
            y_coord = coords['y_coord'].values
            y_coord = np.round(scale*y_coord).astype(int)
            coords['y_coord'] = y_coord

        # add metadata if specified
        if args.voltage > 0:
            coords['voltage'] = args.voltage
        if args.detector_pixel_size > 0:
            coords['detector_pixel_size'] = args.detector_pixel_size
        if args.magnification > 0:
            coords['magnification'] = args.magnification
        if args.amplitude_contrast > 0:
            coords['amplitude_contrast'] = args.amplitude_contrast

        # invert y-axis coordinates if specified
        invert_y = args.invert_y
        if invert_y:
            if args.imagedir is None:
                print('Error: --imagedir must specify the directory of images in order to mirror the y-axis coordinates', file=sys.stderr)
                sys.exit(1)
            dfs = []
            for image_name,group in coords.groupby('image_name'):
                impath = os.path.join(args.imagedir, image_name) + '.' + args.image_ext
                # use glob incase image_ext is '*'
                impath = glob.glob(impath)[0]
                im = load_image(impath)
                height = im.height

                group = mirror_y_axis(group, height)
                dfs.append(group)
            coords = pd.concat(dfs, axis=0)

        # output file format is decided and coordinates are processed, now write files
        if output is None and to_form != 'box' and to_form != 'json':
            output = open(output_path, 'w')
        if to_form == 'box' or to_form == 'json':
            output = output_path

        file_utils.write_coordinates(output, coords, format=to_form, boxsize=boxsize, image_ext=image_ext, suffix=suffix)
Ejemplo n.º 17
0
def load_images(paths):
    for path in paths:
        name = os.path.splitext(os.path.basename(path))[0]
        yield name, load_image(path)
Ejemplo n.º 18
0
def load_im(image):
    im = np.array(load_image(image))
    return im
Ejemplo n.º 19
0
def main(args):

    # set the number of threads
    num_threads = args.num_threads
    from topaz.torch import set_num_threads
    set_num_threads(num_threads)

    ## set the device
    use_cuda = topaz.cuda.set_device(args.device)
    print('# using device={} with cuda={}'.format(args.device, use_cuda),
          file=sys.stderr)

    cutoff = args.pixel_cutoff  # pixel truncation limit

    do_train = (args.dir_a is not None
                and args.dir_b is not None) or (args.hdf is not None)
    if do_train:

        method = args.method
        paired = (method == 'noise2noise')
        preload = args.preload
        holdout = args.holdout  # fraction of image pairs to holdout for validation

        if args.hdf is None:  #use dirA/dirB
            crop = args.crop
            dir_as = args.dir_a
            dir_bs = args.dir_b

            dset_train = []
            dset_val = []

            for dir_a, dir_b in zip(dir_as, dir_bs):
                random = np.random.RandomState(44444)
                if paired:
                    dataset_train, dataset_val = make_paired_images_datasets(
                        dir_a,
                        dir_b,
                        crop,
                        random=random,
                        holdout=holdout,
                        preload=preload,
                        cutoff=cutoff)
                else:
                    dataset_train, dataset_val = make_images_datasets(
                        dir_a,
                        dir_b,
                        crop,
                        cutoff=cutoff,
                        random=random,
                        holdout=holdout)
                dset_train.append(dataset_train)
                dset_val.append(dataset_val)

            dataset_train = dset_train[0]
            for i in range(1, len(dset_train)):
                dataset_train.x += dset_train[i].x
                if paired:
                    dataset_train.y += dset_train[i].y

            dataset_val = dset_val[0]
            for i in range(1, len(dset_val)):
                dataset_val.x += dset_val[i].x
                if paired:
                    dataset_val.y += dset_val[i].y

            shuffle = True
        else:  # make HDF datasets
            dataset_train, dataset_val = make_hdf5_datasets(args.hdf,
                                                            paired=paired,
                                                            cutoff=cutoff,
                                                            holdout=holdout,
                                                            preload=preload)
            shuffle = preload

        # initialize the model
        arch = args.arch
        if arch == 'unet':
            model = dn.UDenoiseNet()
        elif arch == 'unet-small':
            model = dn.UDenoiseNetSmall()
        elif arch == 'unet2':
            model = dn.UDenoiseNet2()
        elif arch == 'unet3':
            model = dn.UDenoiseNet3()
        elif arch == 'fcnet':
            model = dn.DenoiseNet(32)
        elif arch == 'fcnet2':
            model = dn.DenoiseNet2(64)
        elif arch == 'affine':
            model = dn.AffineDenoise()
        else:
            raise Exception('Unknown architecture: ' + arch)

        if use_cuda:
            model = model.cuda()

        # train
        optim = args.optim
        lr = args.lr
        batch_size = args.batch_size
        num_epochs = args.num_epochs
        digits = int(np.ceil(np.log10(num_epochs)))

        num_workers = args.num_workers

        print('epoch', 'loss_train', 'loss_val')
        #criteria = nn.L1Loss()
        criteria = args.criteria

        if method == 'noise2noise':
            iterator = dn.train_noise2noise(model,
                                            dataset_train,
                                            lr=lr,
                                            optim=optim,
                                            batch_size=batch_size,
                                            criteria=criteria,
                                            num_epochs=num_epochs,
                                            dataset_val=dataset_val,
                                            use_cuda=use_cuda,
                                            num_workers=num_workers,
                                            shuffle=shuffle)
        elif method == 'masked':
            iterator = dn.train_mask_denoise(model,
                                             dataset_train,
                                             lr=lr,
                                             optim=optim,
                                             batch_size=batch_size,
                                             criteria=criteria,
                                             num_epochs=num_epochs,
                                             dataset_val=dataset_val,
                                             use_cuda=use_cuda,
                                             num_workers=num_workers,
                                             shuffle=shuffle)

        for epoch, loss_train, loss_val in iterator:
            print(epoch, loss_train, loss_val)
            sys.stdout.flush()

            # save the model
            if args.save_prefix is not None:
                path = args.save_prefix + ('_epoch{:0' + str(digits) +
                                           '}.sav').format(epoch)
                #path = args.save_prefix + '_epoch{}.sav'.format(epoch)
                model.cpu()
                model.eval()
                torch.save(model, path)
                if use_cuda:
                    model.cuda()

        models = [model]

    else:  # load the saved model(s)
        models = []
        for arg in args.model:
            if arg == 'none':
                print('# Warning: no denoising model will be used',
                      file=sys.stderr)
            else:
                print('# Loading model:', arg, file=sys.stderr)
            model = dn.load_model(arg)

            model.eval()
            if use_cuda:
                model.cuda()

            models.append(model)

    # using trained model
    # denoise the images

    normalize = args.normalize
    if args.format_ == 'png' or args.format_ == 'jpg':
        # always normalize png and jpg format
        normalize = True

    format_ = args.format_
    suffix = args.suffix

    lowpass = args.lowpass
    gaus = args.gaussian
    if gaus > 0:
        gaus = dn.GaussianDenoise(gaus)
        if use_cuda:
            gaus.cuda()
    else:
        gaus = None
    inv_gaus = args.inv_gaussian
    if inv_gaus > 0:
        inv_gaus = dn.InvGaussianFilter(inv_gaus)
        if use_cuda:
            inv_gaus.cuda()
    else:
        inv_gaus = None
    deconvolve = args.deconvolve
    deconv_patch = args.deconv_patch

    ps = args.patch_size
    padding = args.patch_padding

    count = 0

    # we are denoising a single MRC stack
    if args.stack:
        with open(args.micrographs[0], 'rb') as f:
            content = f.read()
        stack, _, _ = mrc.parse(content)
        print('# denoising stack with shape:', stack.shape, file=sys.stderr)
        total = len(stack)

        denoised = np.zeros_like(stack)
        for i in range(len(stack)):
            mic = stack[i]
            # process and denoise the micrograph
            mic = denoise_image(mic,
                                models,
                                lowpass=lowpass,
                                cutoff=cutoff,
                                gaus=gaus,
                                inv_gaus=inv_gaus,
                                deconvolve=deconvolve,
                                deconv_patch=deconv_patch,
                                patch_size=ps,
                                padding=padding,
                                normalize=normalize,
                                use_cuda=use_cuda)
            denoised[i] = mic

            count += 1
            print('# {} of {} completed.'.format(count, total),
                  file=sys.stderr,
                  end='\r')

        print('', file=sys.stderr)
        # write the denoised stack
        path = args.output
        print('# writing', path, file=sys.stderr)
        with open(path, 'wb') as f:
            mrc.write(f, denoised)

    else:
        # stream the micrographs and denoise them
        total = len(args.micrographs)

        # make the output directory if it doesn't exist
        if not os.path.exists(args.output):
            os.makedirs(args.output)

        for path in args.micrographs:
            name, _ = os.path.splitext(os.path.basename(path))
            mic = np.array(load_image(path), copy=False).astype(np.float32)

            # process and denoise the micrograph
            mic = denoise_image(mic,
                                models,
                                lowpass=lowpass,
                                cutoff=cutoff,
                                gaus=gaus,
                                inv_gaus=inv_gaus,
                                deconvolve=deconvolve,
                                deconv_patch=deconv_patch,
                                patch_size=ps,
                                padding=padding,
                                normalize=normalize,
                                use_cuda=use_cuda)

            # write the micrograph
            if not args.output:
                if suffix == '' or suffix is None:
                    suffix = '.denoised'
                # write the file to the same location as input
                no_ext, ext = os.path.splitext(path)
                outpath = no_ext + suffix + '.' + format_
            else:
                outpath = args.output + os.sep + name + suffix + '.' + format_
            save_image(mic, outpath)  #, mi=None, ma=None)

            count += 1
            print('# {} of {} completed.'.format(count, total),
                  file=sys.stderr,
                  end='\r')
        print('', file=sys.stderr)
Ejemplo n.º 20
0
 def load_image(self, path):
     x = np.array(load_image(path), copy=False)
     mu = x.mean()
     std = x.std()
     x = (x - mu)/std
     return x