def star_group(star, group): # calculate the number of particles in each micrograph star_dict = p3s.star_parse(star, 'data_') header_len = len(star_dict['data_'])+len(star_dict['loop_']) num_dict = {} with open(star) as read_star: lines = read_star.readlines()[header_len:-1] for line in lines: MicrographName = line.split()[0] num_dict[MicrographName] = num_dict.get(MicrographName, 0) + 1 # sort by defocusU lines = sorted(lines, key=getkey) # group, if less than args.group, and transfer to lines_new lines_new = [] group_num = 0 while len(lines) > 0: MicrographName = lines[0].split()[0] num = num_dict[MicrographName] if num >= group: # check if already reach the end len_lines = len(lines) if num == len_lines: # new group lines_new, group_num = lines_group(lines_new, lines, group_num, 1) break for good, line_good in enumerate(lines): if line_good.split()[0] != MicrographName:break # new group lines_new, group_num = lines_group(lines_new, lines[:good], group_num, 1) lines = lines[good:] else: i = 0 sets = set() while num < group: for line in lines[:i+1]: MicrographName2 = line.split()[0] sets.add(MicrographName2) num = 0 for micrograph in sets: num += num_dict[micrograph] if i < len(lines)-1: i += 1 end = 0 else: end = 1 break if end == 0: for j, line2 in enumerate(lines[i:]): if line2.split()[0] != MicrographName2:break # new group lines_new, group_num = lines_group(lines_new, lines[:i+j], group_num, 1) lines = lines[i+j:] print 'Grouping {} into group_{:05}!\n'.format(sets, group_num) if end == 1: # not new group lines_new, group_num = lines_group(lines_new, lines, group_num, 0) print 'Particles in {} cannot add up to more than {}, so they were grouped to the previous group: group_{:05}!\n'.format(sets, group, group_num) break return lines_new
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <_data.star> Reconstruct from randomly selected particles from _data.star. Needs: relion (v1.4, Scheres, 2012) """ args_def = {'repeat':1000, 'apix':1.25, 'maxres':6, 'walltime':1} parser = argparse.ArgumentParser() parser.add_argument("star", nargs='*', help="specify _data.star") parser.add_argument("-r", "--repeat", type=int, help="specify how many times you want to repeat the experiment (reconstruct from random particles), by default {}".format(args_def['repeat'])) parser.add_argument("-a", "--apix", type=float, help="specify the apix, by default {}".format(args_def['apix'])) parser.add_argument("-m", "--maxres", type=float, help="specify maximum resolution (in Angstrom) to consider in Fourier space, by default {}".format(args_def['maxres'])) parser.add_argument("-w", "--walltime", type=int, help="specify the walltime (in hour), by default {}".format(args_def['walltime'])) args = parser.parse_args() if len(sys.argv) == 1: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options." sys.exit(1) # get default values for i in args_def: if args.__dict__[i] == None: args.__dict__[i] = args_def[i] # repeat star = args.star[0] star_dict = p3s.star_parse(star, 'data_images') header = star_dict['data_'] + star_dict['loop_'] for i in xrange(args.repeat): # root name for output out = star[:-10] + '_repeat{:05}'.format(i) # check if output exists if os.path.isfile(out+'.mrc'): continue # write a new random data.star with open(star) as s_read: lines = s_read.readlines()[len(header):-1] l_len = len(lines) new_star = star[:-10] + '_repeat{:05}_data.star'.format(i) with open(new_star, 'w') as s_write: s_write.write(''.join(header)) # randomly select for l_len times for j in xrange(l_len): k = random.randint(0,l_len-1) s_write.write(lines[k]) s_write.write('\n') # write and submit the job cmd = "`which relion_reconstruct` --i {} --o {} --angpix {} --maxres {} --ctf true".format(new_star, out+'.mrc', args.apix, args.maxres) walltime, cpu, ptile = args.walltime, 1, 1 p3c.ada(cmd, out, walltime, cpu, ptile)
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <star files> Merge star files (including grouping). """ args_def = {'group':50, 'root':'zz'} parser = argparse.ArgumentParser() parser.add_argument("star", nargs='*', help="specify star files to be merged") parser.add_argument("-g", "--group", type=int, help="specify the minimal number of particles for one group, by default {}".format(args_def['group'])) parser.add_argument("-r", "--root", help="specify rootname for output, by default '{}'".format(args_def['root'])) args = parser.parse_args() if len(sys.argv) == 1: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options." sys.exit(1) # get default values for i in args_def: if args.__dict__[i] == None: args.__dict__[i] = args_def[i] # merged = args.root + '_merged.star' write_merge = open(merged, 'w') header = '\ndata_\n\nloop_ \n_rlnMicrographName #1 \n_rlnCoordinateX #2 \n_rlnCoordinateY #3 \n_rlnImageName #4 \n_rlnDefocusU #5 \n_rlnDefocusV #6 \n_rlnDefocusAngle #7 \n_rlnVoltage #8 \n_rlnSphericalAberration #9 \n_rlnAmplitudeContrast #10 \n_rlnMagnification #11 \n_rlnDetectorPixelSize #12 \n_rlnCtfFigureOfMerit #13 \n' write_merge.write(header) for star in args.star: star_dict = p3s.star_parse(star, 'data_') header_len = len(star_dict['data_'])+len(star_dict['loop_']) with open(star) as read_star: for line in read_star.readlines()[header_len:-1]: l = line.split() line_new = l[star_dict['_rlnMicrographName']], l[star_dict['_rlnCoordinateX']], l[star_dict['_rlnCoordinateY']], l[star_dict['_rlnImageName']], l[star_dict['_rlnDefocusU']], l[star_dict['_rlnDefocusV']], l[star_dict['_rlnDefocusAngle']], l[star_dict['_rlnVoltage']], l[star_dict['_rlnSphericalAberration']], l[star_dict['_rlnAmplitudeContrast']], l[star_dict['_rlnMagnification']], l[star_dict['_rlnDetectorPixelSize']], l[star_dict['_rlnCtfFigureOfMerit']] + ' \n' write_merge.write(' '.join(line_new)) write_merge.write(' \n') write_merge.close() print 'The merged star has been written in {}!\n'.format(merged) grouped = args.root + '_merged_grouped.star' with open(grouped, 'w') as write_group: write_group.write(header) write_group.write('_rlnGroupName #14 \n_rlnGroupNumber #15 \n') write_group.write(''.join(star_group(merged, args.group))) write_group.write(' \n') print 'The grouped star has been written in {}!\n'.format(grouped)
def main(): progname = os.path.basename(sys.argv[0]) usage = ( progname + """ [options] <a star file> Scale the OriginX/Y and DetectorPixelSize. """ ) args_def = {"scale": 1, "reset": 0} parser = argparse.ArgumentParser() parser.add_argument("star", nargs="*", help="specify a star file to be processed") parser.add_argument( "-s", "--scale", type=float, help="specify the down scaling factor, by default {}. e.g., 0.5 means downscaled by 0.5 times".format( args_def["scale"] ), ) parser.add_argument( "-r", "--reset", type=float, help="specify as 1 to reset the _rlnOriginX and _rlnOriginY only (will not change _rlnDetectorPixelSize), by default {}".format( args_def["reset"] ), ) args = parser.parse_args() if len(sys.argv) == 1: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options." sys.exit(1) # get default values for i in args_def: if args.__dict__[i] == None: args.__dict__[i] = args_def[i] # star = args.star[0] basename = os.path.basename(os.path.splitext(star)[0]) scaled = "{}_scaled_{}.star".format(basename, args.scale) write_scale = open(scaled, "w") star_dict = p3s.star_parse(star, "data_") # get _rlnDetectorPixelSize, _rlnOriginX, _rlnOriginY dps, ox, oy = star_dict["_rlnDetectorPixelSize"], star_dict["_rlnOriginX"], star_dict["_rlnOriginY"] # write header header = star_dict["data_"] + star_dict["loop_"] write_scale.write("".join(header)) header_len = len(header) with open(star) as read_star: lines = read_star.readlines()[header_len:-1] for line in lines: line = line.split() line[dps] = str(float(line[dps]) * args.scale) line[ox] = str(float(line[ox]) / args.scale) line[oy] = str(float(line[oy]) / args.scale) if args.reset == 1: line[ox] = "0" line[oy] = "0" write_scale.write(" ".join(line) + "\n") write_scale.write(" \n") write_scale.close() print "The scaled star file has been written in {}!".format(scaled)
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <a star file> Write two star files after screening by an item and a cutoff in the star file. Write one star file after screening by a file containing blacklist/whitelist (either keyword or item). """ args_def = {'screen':'0', 'cutoff':'00', 'sfile':'0', 'white':0} parser = argparse.ArgumentParser() parser.add_argument("star", nargs='*', help="specify a star file to be screened") parser.add_argument("-s", "--screen", type=str, help="specify the item, by which the star file will be screened, by default {} (no screening). e.g., 'OriginX'".format(args_def['screen'])) parser.add_argument("-c", "--cutoff", type=str, help="specify the cutoff, by default '{}' (-s and -sf will be combined)".format(args_def['cutoff'])) parser.add_argument("-sf", "--sfile", type=str, help="specify a file containing a keyword each line, by default '{}' (no screening). e.g., 'f.txt'".format(args_def['sfile'])) parser.add_argument("-w", "--white", type=int, help="specify as 1 if you provide a whitelist in -sf".format(args_def['white'])) args = parser.parse_args() if len(sys.argv) == 1: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options." sys.exit(1) # get default values for i in args_def: if args.__dict__[i] == None: args.__dict__[i] = args_def[i] # preprocess -sf if args.sfile != '0': lines_sf = open(args.sfile).readlines() lines_sfile = [] for line in lines_sf: line = line.strip() if line != '': lines_sfile += [line] # get the star file star = args.star[0] basename = os.path.basename(os.path.splitext(star)[0]) star_dict = p3s.star_parse(star, 'data_') header = star_dict['data_'] + star_dict['loop_'] header_len = len(header) with open(star) as read_star: lines = read_star.readlines()[header_len:-1] if args.screen != '0': # get the sc number scn = star_dict['_rln'+args.screen] if args.cutoff != '00': # Name the output files screened1 = '{}_screened_{}-gt-{}.star'.format(basename, args.screen, args.cutoff) screened2 = '{}_screened_{}-le-{}.star'.format(basename, args.screen, args.cutoff) write_screen1 = open(screened1, 'w') write_screen1.write(''.join(header)) write_screen2 = open(screened2, 'w') write_screen2.write(''.join(header)) for line in lines: if float(line.split()[scn]) > float(args.cutoff): write_screen1.write(line) else: write_screen2.write(line) write_screen1.write(' \n') write_screen1.close() write_screen2.write(' \n') write_screen2.close() print 'The screened star files have been written in {} and {}!'.format(screened1, screened2) elif args.sfile != '0': with open('{}_screened.star'.format(basename), 'w') as write_screen: write_screen.write(''.join(header)) if args.white == 0: for line in lines: key = line.split()[scn] if key not in lines_sfile: print 'Include {}.'.format(key) write_screen.write(line) else: for line in lines: key = line.split()[scn] if key in lines_sfile: print 'Include {}.'.format(key) write_screen.write(line) write_screen.write(' \n') elif args.sfile != '0': with open('{}_screened.star'.format(basename), 'w') as write_screen: write_screen.write(''.join(header)) if args.white == 0: for line in lines: skip = 0 for key in lines_sfile: if key in line: skip = 1 print 'Skip {}.'.format(key) break if skip == 0: write_screen.write(line) else: for line in lines: for key in lines_sfile: if key in line: print 'Include {}.'.format(key) write_screen.write(line) break write_screen.write(' \n')
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <star files> Output the coordinates from star files. Origin offsets will be considered as integers. """ args_def = {'box':-1, 'edge':-1, 'x':3710, 'y':3838} parser = argparse.ArgumentParser() parser.add_argument("star", nargs='*', help="specify star files to be processed") parser.add_argument("-b", "--box", type=int, help="specify a box size (in pixel) for output, by default {} (output .star only)".format(args_def['box'])) parser.add_argument("-e", "--edge", type=int, help="specify a distance (in pixel) between box center and micrograph edge, by default {} (don't exclude edge)".format(args_def['edge'])) parser.add_argument("-x", "--x", type=int, help="provide the x dimension (in pixel) of micrographs, by default {}".format(args_def['x'])) parser.add_argument("-y", "--y", type=int, help="provide the y dimension (in pixel) of micrographs, by default {}".format(args_def['y'])) args = parser.parse_args() if len(sys.argv) == 1: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options." sys.exit(1) # get default values for i in args_def: if args.__dict__[i] == None: args.__dict__[i] = args_def[i] # loop over all input files for star in args.star: star_dict = p3s.star_parse(star, 'data_') header_len = len(star_dict['data_'])+len(star_dict['loop_']) # if the star is after the particle extraction step if '_rlnImageName' in star_dict: out_dict = {} with open(star) as s_read: lines = s_read.readlines()[header_len:-1] # loop over lines, generate a dict: {out_name:[{ptcl#:line#}]} for j, line in enumerate(lines): line = line.split() num, rlnImageName = line[star_dict['_rlnImageName']].split('@') # name the output by _rlnImageName out_name = os.path.basename(os.path.splitext(rlnImageName[:-5])[0]) out_dict[out_name] = out_dict.get(out_name, []) + [{num:j}] # loop over out_dict, write coords for each key for out_name in out_dict: out = out_name+'.star' if args.box != -1: out_box = out_name+'.box' o_box_write = open(out_box, 'a') with open(out, 'a') as o_write: o_write.write('\ndata_\n\nloop_ \n_rlnCoordinateX #1 \n_rlnCoordinateY #2 \n') # the value of outname is a list, containing dictionaries, which is sorted by the keys (ptcl#) of the dictionaries for d in sorted(out_dict[out_name]): line = lines[d.values()[0]].split() # get old coord x, y = float(line[star_dict['_rlnCoordinateX']]), float(line[star_dict['_rlnCoordinateY']]) # calculate new coord if '_rlnOriginX' in star_dict: x -= float(line[star_dict['_rlnOriginX']]) y -= float(line[star_dict['_rlnOriginY']]) # exclude the edge if args.edge != -1: if not args.edge<=x<=args.x-args.edge or not args.edge<=y<=args.y-args.edge: continue o_write.write('{:>12} '.format(x) + '{:>12} \n'.format(y)) if args.box != -1: o_box_write.write('{}'.format(x-args.box/2.0) + '\t{}'.format(y-args.box/2.0) + '\t{}'.format(args.box) * 2 + '\n') o_write.write('\n') if args.box != -1: o_box_write.close() # else it is before the particle extraction step, so you must want to convert star to box elif args.box != -1: basename = os.path.basename(os.path.splitext(star)[0]) with open(star) as s_read: lines = s_read.readlines()[header_len:-1] with open(basename+'.box', 'w') as o_box_write: for line in lines: line = line.split() # get old coord x, y = float(line[star_dict['_rlnCoordinateX']]), float(line[star_dict['_rlnCoordinateY']]) # exclude the edge if args.edge != -1: if not args.edge<=x<=args.x-args.edge or not args.edge<=y<=args.y-args.edge: continue o_box_write.write('{}'.format(x-args.box/2.0) + '\t{}'.format(y-args.box/2.0) + '\t{}'.format(args.box) * 2 + '\n')