def main(args): fmt = args._from # detect the input file formats path = args.file if fmt == 'auto': try: fmt = file_utils.detect_format(path) except file_utils.UnknownFormatError as e: print('Error: unrecognized input coordinates file extension (' + e.ext + ')', file=sys.stderr) sys.exit(1) _, ext = os.path.splitext(path) suffix = args.suffix t = args.threshold base = args.output if fmt == 'star': with open(path, 'r') as f: table = star.parse(f) # apply score threshold if star.SCORE_COLUMN_NAME in table.columns: table = table.loc[table[star.SCORE_COLUMN_NAME] >= t] # write per micrograph files for image_name, group in table.groupby('MicrographName'): image_name, _ = os.path.splitext(image_name) path = base + '/' + image_name + suffix + ext with open(path, 'w') as f: star.write(group, f) else: # format is coordinate table table = pd.read_csv(path, sep='\t') if 'score' in table.columns: table = table.loc[table['score'] >= t] # write per micrograph files for image_name, group in table.groupby('image_name'): path = base + '/' + image_name + suffix + ext group.to_csv(path, sep='\t', index=False)
def read_coordinates(path, format='auto'): if format == 'auto': format = detect_format(path) if format == 'star': with open(path, 'r') as f: table = star.parse(f) # standardize the image name, x, y, and score column names d = { star.SCORE_COLUMN_NAME: 'score', 'MicrographName': 'image_name', star.X_COLUMN_NAME: 'x_coord', star.Y_COLUMN_NAME: 'y_coord', star.VOLTAGE: 'voltage', star.DETECTOR_PIXEL_SIZE: 'detector_pixel_size', star.MAGNIFICATION: 'magnification', star.AMPLITUDE_CONTRAST: 'amplitude_contrast', } for k, v in d.items(): if k in table.columns: table[v] = table[k] table = table.drop(k, axis=1) # strip off image extension, but save this for later table['image_name'] = table['image_name'].apply(strip_ext) particles = table elif format == 'box': box = read_box(path) image_name = os.path.basename(os.path.splitext(path)[0]) particles = boxes_to_coordinates(box, image_name=image_name) elif format == 'csv': # this is VIA CSV format particles = read_via_csv(path) else: # default to coordiantes table format particles = pd.read_csv(path, sep='\t') return particles
def main(args): with open(args.file, 'r') as f: table = star.parse(f) if 'ParticleScore' in table.columns: ## columns of interest are 'MicrographName', 'CoordinateX', 'CoordinateY', and 'ParticleScore' table = table[['MicrographName', 'CoordinateX', 'CoordinateY', 'ParticleScore']] table.columns = ['image_name', 'x_coord', 'y_coord', 'score'] else: ## columns of interest are 'MicrographName', 'CoordinateX', and 'CoordinateY' table = table[['MicrographName', 'CoordinateX', 'CoordinateY']] table.columns = ['image_name', 'x_coord', 'y_coord'] ## convert the coordinates to integers table['x_coord'] = table['x_coord'].astype(float).astype(int) table['y_coord'] = table['y_coord'].astype(float).astype(int) ## strip file extensions off the image names if present table['image_name'] = table['image_name'].apply(strip_ext) out = sys.stdout if args.output is not None: out = args.output table.to_csv(out, sep='\t', index=False)
def main(args): verbose = args.verbose form = args._from from_forms = [form for _ in range(len(args.files))] # detect the input file formats if form == 'auto': try: from_forms = [ file_utils.detect_format(path) for path in args.files ] except file_utils.UnknownFormatError as e: print('Error: unrecognized input coordinates file extension (' + e.ext + ')', file=sys.stderr) sys.exit(1) formats_detected = list(set(from_forms)) if verbose > 0: print('# INPUT formats detected: ' + str(formats_detected), file=sys.stderr) # determine the output file format output_path = args.output output = None to_form = args.to if output_path is None: output = sys.stdout # if output is to stdout and form is not set # then raise an error if to_form == 'auto': if len(formats_detected) == 1: # write the same output format as input format to_form = from_forms[0] else: print( 'Error: writing file to stdout and multiple input formats present with no output format (--to) set! Please tell me what format to write!' ) sys.exit(1) if to_form == 'box' or to_form == 'json': print( 'Error: writing BOX or JSON output files requires a destination directory. Please set the --output parameter!' ) sys.exit(1) image_ext = args.image_ext boxsize = args.boxsize if to_form == 'auto': # first check for directory if output_path[-1] == '/': # image-ext must be set for these file formats if image_ext is None: print( 'Error: writing BOX or JSON output files requires setting the image file extension!' ) sys.exit(1) # format is either json or box, check for boxsize to decide if boxsize > 0: # write boxes! if verbose > 0: print( '# Detected output format is BOX, because OUTPUT is a directory and boxsize > 0.', file=sys.stderr) to_form = 'box' else: if verbose > 0: print( '# Detected output format is JSON, because OUTPUT is a directory and no boxsize set.', file=sys.stderr) to_form = 'json' else: try: to_form = file_utils.detect_format(output_path) except file_utils.UnkownFormatError as e: print( 'Error: unrecognized output coordinates file extension (' + e.ext + ')', file=sys.stderr) sys.exit(1) if verbose > 0: print('# OUTPUT format: ' + to_form) suffix = args.suffix t = args.threshold down_scale = args.down_scale up_scale = args.up_scale scale = up_scale / down_scale # special case when inputs and outputs are all star files if len(formats_detected ) == 1 and formats_detected[0] == 'star' and to_form == 'star': dfs = [] for path in args.files: with open(path, 'r') as f: table = star.parse(f) dfs.append(table) table = pd.concat(dfs, axis=0) # convert score column to float and apply threshold if star.SCORE_COLUMN_NAME in table.columns: table = table.loc[table[star.SCORE_COLUMN_NAME] >= t] # scale coordinates if scale != 1: x_coord = table[star.X_COLUMN_NAME].values x_coord = np.round(scale * x_coord).astype(int) table[star.X_COLUMN_NAME] = x_coord y_coord = table[star.Y_COLUMN_NAME].values y_coord = np.round(scale * y_coord).astype(int) table[star.Y_COLUMN_NAME] = y_coord # add metadata if specified if args.voltage > 0: table[star.VOLTAGE] = args.voltage if args.detector_pixel_size > 0: table[star.DETECTOR_PIXEL_SIZE] = args.detector_pixel_size if args.magnification > 0: table[star.MAGNIFICATION] = args.magnification if args.amplitude_contrast > 0: table[star.AMPLITUDE_CONTRAST] = args.amplitude_contrast # write output file if output is None: with open(output_path, 'w') as f: star.write(table, f) else: star.write(table, output) else: # general case # read the input files dfs = [] for i in range(len(args.files)): path = args.files[i] coords = file_utils.read_coordinates(path, format=from_forms[i]) dfs.append(coords) coords = pd.concat(dfs, axis=0) # threshold particles by score (if there is a score) if 'score' in coords.columns: coords = coords.loc[coords['score'] >= t] # scale coordinates if scale != 1: x_coord = coords['x_coord'].values x_coord = np.round(scale * x_coord).astype(int) coords['x_coord'] = x_coord y_coord = coords['y_coord'].values y_coord = np.round(scale * y_coord).astype(int) coords['y_coord'] = y_coord # add metadata if specified if args.voltage > 0: coords['voltage'] = args.voltage if args.detector_pixel_size > 0: coords['detector_pixel_size'] = args.detector_pixel_size if args.magnification > 0: coords['magnification'] = args.magnification if args.amplitude_contrast > 0: coords['amplitude_contrast'] = args.amplitude_contrast # invert y-axis coordinates if specified invert_y = args.invert_y if invert_y: if args.imagedir is None: print( 'Error: --imagedir must specify the directory of images in order to mirror the y-axis coordinates', file=sys.stderr) sys.exit(1) dfs = [] for image_name, group in coords.groupby('image_name'): impath = os.path.join(args.imagedir, image_name) + '.' + args.image_ext # use glob incase image_ext is '*' impath = glob.glob(impath)[0] im = load_image(impath) height = im.height group = mirror_y_axis(group, height) dfs.append(group) coords = pd.concat(dfs, axis=0) # output file format is decided and coordinates are processed, now write files if output is None and to_form != 'box' and to_form != 'json': output = open(output_path, 'w') if to_form == 'box' or to_form == 'json': output = output_path file_utils.write_coordinates(output, coords, format=to_form, boxsize=boxsize, image_ext=image_ext, suffix=suffix)