def relion_star_downgrade(star_file): """Downgrade RELION 3.1 STAR file to RELION 3.0 format for Warp """ star = starfile.read(star_file) # Merge optics info into particles dataframe data = star['particles'].merge(star['optics']) # Get necessary data from 3.1 style star file # (RELION 3.0 style expected by warp for particle extraction) xyz_headings = [f'rlnCoordinate{axis}' for axis in 'XYZ'] shift_headings = [f'rlnOrigin{axis}Angst' for axis in 'XYZ'] euler_headings = [f'rlnAngle{euler}' for euler in ('Rot', 'Tilt', 'Psi')] xyz = data[xyz_headings].to_numpy() shifts_ang = data[shift_headings].to_numpy() pixel_size = data['rlnImagePixelSize'].to_numpy().reshape((-1, 1)) eulers = data[euler_headings].to_numpy() data_out = {} data_out['rlnMicrographName'] = data['rlnMicrographName'] # Get shifts in pixels (RELION 3.0 style) shifts_px = shifts_ang / pixel_size # update XYZ positions xyz_shifted = xyz - shifts_px # Create output DataFrame df = pd.DataFrame.from_dict(data_out, orient='columns') for idx in range(3): df[xyz_headings[idx]] = xyz_shifted[:, idx] for idx in range(3): df[euler_headings[idx]] = eulers[:, idx] # Derive output filename star_file = Path(star_file) stem = star_file.stem output_filename = star_file.parent / (str(stem) + '_rln3.0.star') # Write output starfile.write(df, output_filename, overwrite=True) click.echo(f'Done! Wrote RELION 3.0 format STAR file to {output_filename}') return
def read_star(star_path, **kwargs): """ Dispatch function for reading a starfile into one or multiple ParticleBlocks """ try: raw_data = starfile.read(star_path, always_dict=True) except pd.errors.EmptyDataError: # raised sometimes by .star files with completely different data raise ParseError(f'the contents of {star_path} have the wrong format') failed_reader_functions = [] for style, reader_function in reader_functions.items(): try: particle_blocks = reader_function(raw_data, star_path=star_path, **kwargs) return particle_blocks except ParseError: failed_reader_functions.append((style, reader_function)) raise ParseError( f'Failed to parse {star_path} using {failed_reader_functions}')
def get_dist(fileIn): with open(fileIn, 'r') as f_in: lines = islice(f_in, line_start, line_end) data = [line.strip() for line in lines] data_header = [] data_body = [] for i in data: if i == 'loop_': continue if i.startswith('_rln'): data_header.append(i) else: data_body.append(i) data_body = list(filter(None, data_body)) data = [x.split() for x in data_body] return(data) df = starfile.read(data_star_file) particle_number = df['particles']['rlnMicrographName'].shape[0] for index, model_file in enumerate(model_star_files): print("\n") print(colored("ITERATION -------->",'red'), colored(index,'green')) line_start, line_end = line_numbers(model_file) data = get_dist(model_file) for k in range(len(data)): percentage = float(data[k][1])*100 pcles_in_class = round(float(data[k][1])*particle_number) print("{} :========> {:.2f} % | ~{} particles".format(data[k][0], percentage,pcles_in_class))
def test_read_non_existent_file(self): f = Path('non-existent-file.star') assert f.exists() is False starfile.read(f)
def read_cbox(cbox_path, name_regex=None, pixel_size=None, **kwargs): data = starfile.read(cbox_path)['cryolo'] coords = data[[f'Coordinate{axis}' for axis in 'XYZ']].to_numpy() orientations = np.tile(np.identity(3), (len(data), 1, 1)) name = guess_name(cbox_path, name_regex) return ParticleBlock(coords, orientations, name=name, pixel_size=pixel_size)
def warp2dynamo(warp_star_file, output_dynamo_table_file, extracted_box_size): """ Converts a Warp STAR file into a Dynamo table file. Outputs a few things 1) dynamo table and corresponding table map (.doc) 2) dynamo STAR file as data container (to avoid reextraction) 3) a separate table for reextraction as a dynamo data folder (STAR container didn't work in my hands for alignment projects) """ # Read STAR file relion_star = starfile.read(warp_star_file) # Initialise empty dict for dynamo dynamo_data = {} # Get XYZ positions and put into data for axis in ('x', 'y', 'z'): relion_heading = 'rlnCoordinate' + axis.upper() dynamo_data[axis] = relion_star[relion_heading] # Get euler angles and convert to dynamo convention (only if eulers present in STAR file) if 'rlnAngleRot' in relion_star.columns: eulers_relion = relion_star[['rlnAngleRot', 'rlnAngleTilt', 'rlnAnglePsi']].to_numpy() eulers_dynamo = convert_eulers(eulers_relion, source_meta='relion', target_meta='dynamo') dynamo_data['tdrot'] = eulers_dynamo[:, 0] dynamo_data['tilt'] = eulers_dynamo[:, 1] dynamo_data['narot'] = eulers_dynamo[:, 2] # Add tomogram info dynamo_data['tomo_file'] = relion_star['rlnMicrographName'] # Convert to DataFrame df = pd.DataFrame.from_dict(dynamo_data) # Write table file output_dynamo_table_file = sanitise_dynamo_table_filename(output_dynamo_table_file) click.echo( f"Writing out Dynamo table file '{output_dynamo_table_file}' and corresponding table map file with appropriate info...\n") dynamotable.write(df, output_dynamo_table_file) # Write out dynamo STAR file to avoid reextraction dynamo_star_name = output_dynamo_table_file + '.star' click.echo( f"Writing out Dynamo format STAR file '{dynamo_star_name}' to avoid reextraction...\n") tags = [x + 1 for x in range(df.shape[0])] particle_files = relion_star['rlnImageName'] dynamo_star = {'tag': tags, 'particleFile': particle_files} dynamo_star = pd.DataFrame.from_dict(dynamo_star) starfile.write(dynamo_star, dynamo_star_name) # Write out reextraction table and .doc file (STAR files with mrc volumes didn't work in dynamo 1.1.509 in my hands) # Get extraction table name reextraction_table_name = reextract_table_filename(output_dynamo_table_file) # Change xyz positions to match centers of extracted boxes for axis in ('x', 'y', 'z'): df[axis] = np.ones_like(df[axis]) * (extracted_box_size / 2) # Change tomo_file to point to individual particles and make tomo equal to tags df['tomo_file'] = relion_star['rlnImageName'] df['tomo'] = dynamo_star['tag'] # Write click.echo( f"Writing out table and table map to facilitate reextraction if dynamo STAR file doesn't work...") click.echo( f"General reextraction command: dtcrop <tomogram_table_map.doc> <tableForAllTomograms> <outputfolder> <sidelength> -asBoxes 1") extraction_command = f"dtcrop {reextraction_table_name.replace('.tbl', '.doc')} {reextraction_table_name} <outputfolder> {extracted_box_size} -asBoxes 1" reextraction_matlab = output_dynamo_table_file.replace('.tbl', 'reextraction_script.m') with open(reextraction_matlab, 'w') as f: f.write(f'{extraction_command}\n') dynamotable.write(df, reextraction_table_name) click.echo(f"\nDone! Converted Warp output '{warp_star_file}' into Dynamo input files") return