def downsample_stack_file(cls, stack_file, side, output_stack_file=None, mask_file=None): if output_stack_file is None: output_stack_file = set_output_name(stack_file, 'downsampled') if os.path.exists(output_stack_file): raise FileExistsError( f"output file '{yellow(output_stack_file)}' already exists!") if mask_file: if not os.path.exists(mask_file): logger.error(f"mask file {yellow(mask_file)} doesn't exist!") mask = load_stack_from_file(mask_file) else: mask = None stack = load_stack_from_file(stack_file) downsampled_stack = cls.downsample(stack, side, compute_fx=False, stack=True, mask=mask) logger.info( f"downsampled stack from size {stack.shape} to {downsampled_stack.shape}." f" saving to {yellow(output_stack_file)}..") with mrcfile.new(output_stack_file) as mrc_fh: mrc_fh.set_data(downsampled_stack) logger.debug(f"saved to {output_stack_file}")
def star_phaseflip_cmd(stack_file, output=None): """ \b ############################ Prewhitten Stack ############################ Prewhitten projections in stack file. \b Example: $ python aspire.py prewhitten projections.mrc will produce file projections_prewhitten.mrc """ if output is None: output = set_output_name(stack_file, 'prewhitten') if os.path.exists(output): logger.error(f"output file {yellow(output)} already exsits! " f"remove first or use another name with '-o NAME' flag") return logger.info("prewhittening projections..") PreProcessor.prewhiten_stack_file(stack_file, output=output) logger.info(f"saved to {yellow(output)}.")
def crop_stack_file(cls, stack_file, size, output_stack_file=None, fill_value=None): if output_stack_file is None: output_stack_file = set_output_name(stack_file, 'cropped') if os.path.exists(output_stack_file): raise FileExistsError( f"output file '{yellow(output_stack_file)}' already exists!") stack = load_stack_from_file(stack_file) fill_value = fill_value or PreProcessorConfig.crop_stack_fill_value cropped_stack = cls.crop_stack(stack, size, fill_value=fill_value) action = 'cropped' if size < stack.shape[1] else 'padded' logger.info( f"{action} stack from size {stack.shape} to size {cropped_stack.shape}." f" saving to {yellow(output_stack_file)}..") with mrcfile.new(output_stack_file) as mrc: mrc.set_data(cropped_stack) logger.debug(f"saved to {output_stack_file}")
def normalize_cmd(stack_file, output=None): """ \b ############################ Normalize Stack ############################ Normalize stack of projections. \b Example: $ python aspire.py normalize projections.mrc will produce file projections_normalized.mrc """ if output is None: output = set_output_name(stack_file, 'normalized') if os.path.exists(output): logger.error(f"output file {yellow(output)} already exsits! " f"remove first or use another name with '-o NAME' flag") return logger.info("normalizing projections..") stack = load_stack_from_file(stack_file, c_contiguous=True) normalized_stack = PreProcessor.normalize_background(stack.astype('float64')) with mrcfile.new(output) as fh: fh.set_data(normalized_stack.astype('float32')) logger.info(f"saved to {yellow(output)}.")
def abinitio_cmd(stack_file, output): """\b ############################ Abinitio ############################ Abinitio accepts a stack file, calculates Abinitio algorithm on it and saves the results into output file (default adds '_abinitio' to stack name) """ if output is None: output = set_output_name(stack_file, 'abinitio') if os.path.exists(output): logger.error(f"file {yellow(output)} already exsits! remove first " "or use another name with '-o NAME'") return stack = load_stack_from_file(stack_file) logger.info(f'running abinitio on stack file {stack_file}..') output_stack = Abinitio.cryo_abinitio_c1_worker(stack) with mrcfile.new(output) as mrc_fh: mrc_fh.set_data(output_stack.astype('float32')) logger.info(f"saved to {yellow(output)}.")
def chained_save_stack(ctx_obj, o): """ Save MRC stack to output file """ if os.path.exists(o): # TODO move this check before anything starts running logger.error("output file {} already exists! " "please rename/delete or use flag -o with different output name") sys.exit(1) logger.info("saving stack {}..".format(o)) mrcfile.new(o, ctx_obj.stack)
def global_phaseflip_cmd(stack_file, output): """ \b ############################ Global Phaseflip ############################ Apply global phase-flip to a stack file """ logger.info("calculating global-phaseflip..") PreProcessor.global_phaseflip_stack_file(stack_file, output_stack_file=output)
def global_phaseflip_stack(stack): """ Apply global phase flip to an image stack if needed. Check if all images in a stack should be globally phase flipped so that the molecule corresponds to brighter pixels and the background corresponds to darker pixels. This is done by comparing the mean in a small circle around the origin (supposed to correspond to the molecule) with the mean of the noise, and making sure that the mean of the molecule is larger. Examples: >> import mrcfile >> stack = mrcfile.open('stack.mrcs') >> stack = global_phaseflip_stack(stack) :param stack: stack of images to phaseflip if needed :return stack: stack which might be phaseflipped when needed """ if not len(stack.shape) in [2, 3]: raise Exception('illegal stack size/shape! stack should be either 2 or 3 dimensional. ' '(stack shape:{})'.format(stack.shape)) num_of_images = stack.shape[2] if len(stack.shape) == 3 else 1 # make sure images are square if stack.shape[1] != stack.shape[2]: raise Exception(f'images must be square! ({stack.shape[0]}, {stack.shape[1]})') image_side_length = stack.shape[0] image_center = (image_side_length + 1) / 2 coor_mat_m, coor_mat_n = meshgrid(range(image_side_length), range(image_side_length)) distance_from_center = sqrt((coor_mat_m - image_center)**2 + (coor_mat_n - image_center)**2) # calculate indices of signal and noise samples assuming molecule is around the center signal_indices = distance_from_center < round(image_side_length / 4) signal_indices = signal_indices.astype(int) # fill_value by default is True/False noise_indices = distance_from_center > round(image_side_length / 2 * 0.8) noise_indices = noise_indices.astype(int) signal_mean = zeros([num_of_images, 1]) noise_mean = zeros([num_of_images, 1]) for image_idx in range(num_of_images): proj = stack[:, :, image_idx] signal_mean[image_idx] = mean(proj[signal_indices]) noise_mean[image_idx] = mean(proj[noise_indices]) signal_mean = mean(signal_mean) noise_mean = mean(noise_mean) if signal_mean < noise_mean: logger.info('phase-flipping stack..') return -stack logger.info('no need to phase-flip stack.') return stack
def viewstack_cmd(stack_file, numslices, startslice, nrows, ncols): """ \b ############################ View Stack ############################ Plot projections using GUI. """ logger.info(f"viewing stack {stack_file}..") stack = load_stack_from_file(stack_file) view_stack(stack, numslices=numslices, startslice=startslice, nrows=nrows, ncols=ncols)
def crop_cmd(stack_file, size, output, fill_value): """ \b ############################ Crop Stack ############################ Crop projections in stack to squares of 'size x size' px. Then save the cropped stack into a new MRC file. In case size is bigger than original stack, padding will apply. When padding, `--fill-value=VAL` will be used for the padded values. """ logger.info(f"resizing projections in {stack_file} to {size}x{size}..") PreProcessor.crop_stack_file(stack_file, size, output_stack_file=output, fill_value=fill_value)
def compare_cmd(stack_file_1, stack_file_2, max_error): """ \b ############################ Compare stacks ############################ Calculate the relative error between 2 stack files. Stack files can be in MRC/MRCS, NPY or MAT formats. """ logger.info(f"calculating relative err between '{stack_file_1}' and '{stack_file_2}'..") relative_err = compare_stack_files(stack_file_1, stack_file_2, verbose=AspireConfig.verbosity, max_error=max_error) logger.info(f"relative err: {relative_err}")
def inspect_cmd(stack_file): """ \b ############################ Inspect Stack ############################ Print info about projections in stack file. """ # load stack but don't convert to C-contiguous indexing stack, stack_type = load_stack_from_file(stack_file, c_contiguous=False, return_format=True) contiguous = red("F-Contiguous") if stack.flags.f_contiguous else "C-Contiguous" logger.info(f"\nStack shape: {yellow(stack.shape)}" f"\nStack format: {yellow(stack_type)}" f"\nContiguous type: {contiguous}")
def downsample_cmd(stack_file, side, output, mask): """ \b ############################ Downsample Stack ############################ Use Fourier methods to change the sample interval and/or aspect ratio of any dimensions of the input projections-stack to the output of SIZE x SIZE. If the optional mask argument is given, this is used as the zero-centered Fourier mask for the re-sampling. The size of mask should be the same as the output image size. """ logger.info(f"downsampling stack {stack_file} to size {side}x{side} px..") PreProcessor.downsample_stack_file(stack_file, side, output_stack_file=output, mask_file=mask)
def global_phaseflip_stack_file(cls, stack_file, output_stack_file=None): if output_stack_file is None: output_stack_file = set_output_name(stack_file, 'g-pf') if os.path.exists(output_stack_file): raise FileExistsError(f"output file '{yellow(output_stack_file)}' already exists!") in_stack = load_stack_from_file(stack_file) out_stack = cls.global_phaseflip_stack(in_stack) # check if stack was flipped if (out_stack[0] == in_stack[0]).all(): logger.info('not saving new mrc file.') else: with mrcfile.new(output_stack_file) as mrc: mrc.set_data(out_stack) logger.info(f"stack is flipped and saved as {yellow(output_stack_file)}")
def star_phaseflip_cmd(star_file, output=None): """ \b ############################ Phaseflip (STAR file) ############################ \b Apply phase-flip to projections in multiple mrc files having a STAR file pointing at them. After phaseflipping them, they will all be saved in 1 MRC file. Default output will add '_phaseflipped.mrc' to star file basename \b Example: ./aspire.py phaseflip ../my_projections/set.star will produce file set_phaseflipped.mrc """ if not star_file.endswith('.star'): logger.error("input file name doesn't end with '.star'!") return if output is None: # convert 'path/to/foo.star' -> 'foo_phaseflipped.mrc' output = '_phaseflipped.mrc'.join(star_file.rsplit('.star', 1)) output = os.path.basename(output) if os.path.exists(output): logger.error(f"output file {yellow(output)} already exists! " "Use flag '-o OUTPUT' or remove file.") return logger.info("phaseflipping projections..") stack = PreProcessor.phaseflip_star_file(star_file) with mrcfile.new(output) as fh: fh.set_data(stack.astype('float32')) logger.info(f"saved {yellow(output)}.")
def classify_cmd(stack_file, output, avg_nn, classification_nn): """ \b ############################ Classification-Averaging ############################ This command accepts a stack file and calculates the classification averaging algorithm. \b When it's done, it saves 2 files: 1) The full classified stack 2) A subset of the classified stack (for faster calculations) \b Example: input - stack.mrc output1 - stack_classified.mrc (or use flag -o to override) output2 - stack_classified_subset.mrc """ if output is None: output = set_output_name(stack_file, 'classified') if os.path.exists(output): logger.error(f"output file {yellow(output)} already exsits! " f"remove first or use another name with '-o NAME' flag") return subset_output_name = set_output_name(output, 'subset') if os.path.exists(subset_output_name): logger.error( f"subset file {yellow(subset_output_name)} already exsits! " f"remove first or use another name with '-o NAME' flag") return logger.info(f'class-averaging {stack_file}..') ClassAverages.run(stack_file, output, n_nbor=classification_nn, nn_avg=avg_nn) logger.info(f"saved to {yellow(output)}.") logger.info(f"saved to {yellow(subset_output_name)}.")
def compare_stacks(stack1, stack2, verbose=None, max_error=None): """ Calculate the difference between two projection-stacks. Return the relative error between them. :param stack1: first stack to compare :param stack2: second stack to compare :param verbose: level of verbosity verbose=0 silent verbose=1 show progress bar verbose=2 print progress every 1000 images verbose=3 print message for each processed image :param max_error: when given, raise an exception if difference between stacks is too big :return: returns the accumulative error between the two stacks """ if max_error is not None: try: max_error = np.longdouble(max_error) except (TypeError, ValueError): raise WrongInput("max_error must be either a float or an integer!") if verbose is None: verbose = AspireConfig.verbosity # check the dimensions of the stack are compatible if stack1.shape != stack2.shape: raise DimensionsIncompatible("Can't compare stacks of different sizes!" f" {stack1.shape} != {stack2.shape}") num_of_images = stack1.shape[0] if num_of_images == 0: logger.warning('stacks are empty!') if verbose == 1: pb = ProgressBar(total=100, prefix='comparing:', suffix='completed', decimals=0, length=100, fill='%') relative_err = 0 accumulated_err = 0 for i in range(num_of_images): err = np.linalg.norm(stack1[i] - stack2[i]) / np.linalg.norm(stack1[i]) accumulated_err += err relative_err = accumulated_err / (i + 1) # if we already reached a relatively big error, we can stop here # we can't ask "if max_error" as max_error is so small and treated as 0 (False) if max_error is not None and relative_err > max_error: raise ErrorTooBig( 'Stacks comparison failed! error is too big: {}'.format( relative_err)) if verbose == 0: continue elif verbose == 1: pb.print_progress_bar((i + 1) / num_of_images * 100) elif verbose == 2 and (i + 1) % 100 == 0: logger.info( f'Finished comparing {i+1}/{num_of_images} projections. ' f'Relative error so far: {relative_err}') elif verbose == 3: logger.info( f'Difference between projections ({i+1}) <> ({i+1}): {err}') if verbose == 2: logger.info( f'Finished comparing {num_of_images}/{num_of_images} projections. ' f'Relative error: {relative_err}') return relative_err
def phaseflip_star_file(cls, star_file, pixel_size=None): """ todo add verbosity """ # star is a list of star lines describing projections star_records = read_star(star_file)['__root__'] num_projections = len(star_records) projs_init = False # has the stack been initialized already last_processed_stack = None for idx in range(num_projections): # Get the identification string of the next image to process. # This is composed from the index of the image within an image stack, # followed by '@' and followed by the filename of the MRC stack. image_id = star_records[idx].rlnImageName image_parts = image_id.split('@') image_idx = int(image_parts[0]) - 1 stack_name = image_parts[1] # Read the image stack from the disk, if different from the current one. # TODO can we revert this condition to positive? what they're equal? if stack_name != last_processed_stack: mrc_path = os.path.join(os.path.dirname(star_file), stack_name) stack = load_stack_from_file(mrc_path) logger.info( f"flipping stack in {yellow(os.path.basename(mrc_path))}" f" - {stack.shape}") last_processed_stack = stack_name if image_idx > stack.shape[2]: raise DimensionsIncompatible( f'projection {image_idx} in ' f'stack {stack_name} does not exist') proj = stack[image_idx] validate_square_projections(proj) side = proj.shape[1] if not projs_init: # TODO why not initialize before loop (maybe b/c of huge stacks?) # projections was "PFprojs" originally projections = np.zeros((num_projections, side, side), dtype='float32') projs_init = True star_record_data = Box( cryo_parse_Relion_CTF_struct(star_records[idx])) if pixel_size is None: if star_record_data.tmppixA != -1: pixel_size = star_record_data.tmppixA else: raise WrongInput( "Pixel size not provided and does not appear in STAR file" ) h = cryo_CTF_Relion(side, star_record_data) imhat = fftshift(fft2(proj)) pfim = ifft2(ifftshift(imhat * np.sign(h))) if side % 2 == 1: # This test is only vali for odd n # images are single precision imaginery_comp = np.norm(np.imag(pfim[:])) / np.norm(pfim[:]) if imaginery_comp > 5.0e-7: logger.warning( f"Large imaginary components in image {image_idx}" f" in stack {stack_name} = {imaginery_comp}") pfim = np.real(pfim) projections[idx, :, :] = pfim.astype('float32') return projections