Ejemplo n.º 1
0
def star_group(star, group):
	# calculate the number of particles in each micrograph
	star_dict = p3s.star_parse(star, 'data_')
	header_len = len(star_dict['data_'])+len(star_dict['loop_'])
	num_dict = {}
	with open(star) as read_star:
		lines = read_star.readlines()[header_len:-1]
		for line in lines:
			MicrographName = line.split()[0]
			num_dict[MicrographName] = num_dict.get(MicrographName, 0) + 1
	# sort by defocusU
	lines = sorted(lines, key=getkey)
	# group, if less than args.group, and transfer to lines_new
	lines_new = []
	group_num = 0
	while len(lines) > 0:
		MicrographName = lines[0].split()[0]
		num = num_dict[MicrographName]
		if num >= group:
			# check if already reach the end
			len_lines = len(lines)
			if num == len_lines:
				# new group
				lines_new, group_num = lines_group(lines_new, lines, group_num, 1)
				break
			for good, line_good in enumerate(lines):
				if line_good.split()[0] != MicrographName:break
			# new group
			lines_new, group_num = lines_group(lines_new, lines[:good], group_num, 1)
			lines = lines[good:]
		else:
			i = 0
			sets = set()			
			while num < group:
				for line in lines[:i+1]:
					MicrographName2 = line.split()[0]
					sets.add(MicrographName2)
				num = 0
				for micrograph in sets:
					num += num_dict[micrograph]
				if i < len(lines)-1:
					i += 1
					end = 0
				else:
					end = 1
					break
			if end == 0:
				for j, line2 in enumerate(lines[i:]):
					if line2.split()[0] != MicrographName2:break
				# new group
				lines_new, group_num = lines_group(lines_new, lines[:i+j], group_num, 1)
				lines = lines[i+j:]
				print 'Grouping {} into group_{:05}!\n'.format(sets, group_num)				
			if end == 1:
				# not new group
				lines_new, group_num = lines_group(lines_new, lines, group_num, 0)
				print 'Particles in {} cannot add up to more than {}, so they were grouped to the previous group: group_{:05}!\n'.format(sets, group, group_num)
				break
	return lines_new
Ejemplo n.º 2
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = progname + """ [options] <_data.star>
	Reconstruct from randomly selected particles from _data.star.
	Needs:
	relion (v1.4, Scheres, 2012)
	"""
	
	args_def = {'repeat':1000, 'apix':1.25, 'maxres':6, 'walltime':1}	
	parser = argparse.ArgumentParser()
	parser.add_argument("star", nargs='*', help="specify _data.star")
	parser.add_argument("-r", "--repeat", type=int, help="specify how many times you want to repeat the experiment (reconstruct from random particles), by default {}".format(args_def['repeat']))
	parser.add_argument("-a", "--apix", type=float, help="specify the apix, by default {}".format(args_def['apix']))
	parser.add_argument("-m", "--maxres", type=float, help="specify maximum resolution (in Angstrom) to consider in Fourier space, by default {}".format(args_def['maxres']))
	parser.add_argument("-w", "--walltime", type=int, help="specify the walltime (in hour), by default {}".format(args_def['walltime']))
	args = parser.parse_args()
	
	if len(sys.argv) == 1:
		print "usage: " + usage
		print "Please run '" + progname + " -h' for detailed options."
		sys.exit(1)
	# get default values
	for i in args_def:
		if args.__dict__[i] == None:
			args.__dict__[i] = args_def[i]
	# repeat
	star = args.star[0]
	star_dict = p3s.star_parse(star, 'data_images')
	header = star_dict['data_'] + star_dict['loop_']
	for i in xrange(args.repeat):
		# root name for output
		out = star[:-10] + '_repeat{:05}'.format(i)
		# check if output exists
		if os.path.isfile(out+'.mrc'):
			continue
		# write a new random data.star
		with open(star) as s_read:
			lines = s_read.readlines()[len(header):-1]
			l_len = len(lines)
			new_star = star[:-10] + '_repeat{:05}_data.star'.format(i)
			with open(new_star, 'w') as s_write:
				s_write.write(''.join(header))
				# randomly select for l_len times
				for j in xrange(l_len):
					k = random.randint(0,l_len-1)
					s_write.write(lines[k])
				s_write.write('\n')		
		# write and submit the job
		cmd = "`which relion_reconstruct` --i {} --o {} --angpix {} --maxres {} --ctf true".format(new_star, out+'.mrc', args.apix, args.maxres)
		walltime, cpu, ptile = args.walltime, 1, 1
		p3c.ada(cmd, out, walltime, cpu, ptile)
Ejemplo n.º 3
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = progname + """ [options] <star files>
	Merge star files (including grouping).
	"""
	
	args_def = {'group':50, 'root':'zz'}
	parser = argparse.ArgumentParser()
	parser.add_argument("star", nargs='*', help="specify star files to be merged")
	parser.add_argument("-g", "--group", type=int, help="specify the minimal number of particles for one group, by default {}".format(args_def['group']))
	parser.add_argument("-r", "--root", help="specify rootname for output, by default '{}'".format(args_def['root']))
	args = parser.parse_args()
	
	if len(sys.argv) == 1:
		print "usage: " + usage
		print "Please run '" + progname + " -h' for detailed options."
		sys.exit(1)
	# get default values
	for i in args_def:
		if args.__dict__[i] == None:
			args.__dict__[i] = args_def[i]
	# 
	merged = args.root + '_merged.star'
	write_merge = open(merged, 'w')
	header = '\ndata_\n\nloop_ \n_rlnMicrographName #1 \n_rlnCoordinateX #2 \n_rlnCoordinateY #3 \n_rlnImageName #4 \n_rlnDefocusU #5 \n_rlnDefocusV #6 \n_rlnDefocusAngle #7 \n_rlnVoltage #8 \n_rlnSphericalAberration #9 \n_rlnAmplitudeContrast #10 \n_rlnMagnification #11 \n_rlnDetectorPixelSize #12 \n_rlnCtfFigureOfMerit #13 \n'
	write_merge.write(header)
	for star in args.star:
		star_dict = p3s.star_parse(star, 'data_')
		header_len = len(star_dict['data_'])+len(star_dict['loop_'])
		with open(star) as read_star:
			for line in read_star.readlines()[header_len:-1]:
				l = line.split()
				line_new = l[star_dict['_rlnMicrographName']], l[star_dict['_rlnCoordinateX']], l[star_dict['_rlnCoordinateY']], l[star_dict['_rlnImageName']], l[star_dict['_rlnDefocusU']], l[star_dict['_rlnDefocusV']], l[star_dict['_rlnDefocusAngle']], l[star_dict['_rlnVoltage']], l[star_dict['_rlnSphericalAberration']], l[star_dict['_rlnAmplitudeContrast']], l[star_dict['_rlnMagnification']], l[star_dict['_rlnDetectorPixelSize']], l[star_dict['_rlnCtfFigureOfMerit']] + ' \n'
				write_merge.write(' '.join(line_new))
	write_merge.write(' \n')	
	write_merge.close()
	print 'The merged star has been written in {}!\n'.format(merged)
	grouped = args.root + '_merged_grouped.star'
	with open(grouped, 'w') as write_group:
		write_group.write(header)
		write_group.write('_rlnGroupName #14 \n_rlnGroupNumber #15 \n')
		write_group.write(''.join(star_group(merged, args.group)))
		write_group.write(' \n')
	print 'The grouped star has been written in {}!\n'.format(grouped)
Ejemplo n.º 4
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = (
        progname
        + """ [options] <a star file>
	Scale the OriginX/Y and DetectorPixelSize.
	"""
    )

    args_def = {"scale": 1, "reset": 0}
    parser = argparse.ArgumentParser()
    parser.add_argument("star", nargs="*", help="specify a star file to be processed")
    parser.add_argument(
        "-s",
        "--scale",
        type=float,
        help="specify the down scaling factor, by default {}. e.g., 0.5 means downscaled by 0.5 times".format(
            args_def["scale"]
        ),
    )
    parser.add_argument(
        "-r",
        "--reset",
        type=float,
        help="specify as 1 to reset the _rlnOriginX and _rlnOriginY only (will not change _rlnDetectorPixelSize), by default {}".format(
            args_def["reset"]
        ),
    )
    args = parser.parse_args()

    if len(sys.argv) == 1:
        print "usage: " + usage
        print "Please run '" + progname + " -h' for detailed options."
        sys.exit(1)
        # get default values
    for i in args_def:
        if args.__dict__[i] == None:
            args.__dict__[i] = args_def[i]
            #
    star = args.star[0]
    basename = os.path.basename(os.path.splitext(star)[0])
    scaled = "{}_scaled_{}.star".format(basename, args.scale)
    write_scale = open(scaled, "w")
    star_dict = p3s.star_parse(star, "data_")
    # get _rlnDetectorPixelSize, _rlnOriginX, _rlnOriginY
    dps, ox, oy = star_dict["_rlnDetectorPixelSize"], star_dict["_rlnOriginX"], star_dict["_rlnOriginY"]
    # write header
    header = star_dict["data_"] + star_dict["loop_"]
    write_scale.write("".join(header))
    header_len = len(header)
    with open(star) as read_star:
        lines = read_star.readlines()[header_len:-1]
    for line in lines:
        line = line.split()
        line[dps] = str(float(line[dps]) * args.scale)
        line[ox] = str(float(line[ox]) / args.scale)
        line[oy] = str(float(line[oy]) / args.scale)
        if args.reset == 1:
            line[ox] = "0"
            line[oy] = "0"
        write_scale.write(" ".join(line) + "\n")
    write_scale.write(" \n")
    write_scale.close()
    print "The scaled star file has been written in {}!".format(scaled)
Ejemplo n.º 5
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = progname + """ [options] <a star file>
	Write two star files after screening by an item and a cutoff in the star file.
	Write one star file after screening by a file containing blacklist/whitelist (either keyword or item).
	"""
	
	args_def = {'screen':'0', 'cutoff':'00', 'sfile':'0', 'white':0}	
	parser = argparse.ArgumentParser()
	parser.add_argument("star", nargs='*', help="specify a star file to be screened")
	parser.add_argument("-s", "--screen", type=str, help="specify the item, by which the star file will be screened, by default {} (no screening). e.g., 'OriginX'".format(args_def['screen']))
	parser.add_argument("-c", "--cutoff", type=str, help="specify the cutoff, by default '{}' (-s and -sf will be combined)".format(args_def['cutoff']))
	parser.add_argument("-sf", "--sfile", type=str, help="specify a file containing a keyword each line, by default '{}' (no screening). e.g., 'f.txt'".format(args_def['sfile']))
	parser.add_argument("-w", "--white", type=int, help="specify as 1 if you provide a whitelist in -sf".format(args_def['white']))
	args = parser.parse_args()
	
	if len(sys.argv) == 1:
		print "usage: " + usage
		print "Please run '" + progname + " -h' for detailed options."
		sys.exit(1)
	# get default values
	for i in args_def:
		if args.__dict__[i] == None:
			args.__dict__[i] = args_def[i]
	# preprocess -sf
	if args.sfile != '0':
		lines_sf = open(args.sfile).readlines()
		lines_sfile = []
		for line in lines_sf:
			line = line.strip()
			if line != '':
				lines_sfile += [line]
	# get the star file
	star = args.star[0]
	basename = os.path.basename(os.path.splitext(star)[0])
	star_dict = p3s.star_parse(star, 'data_')
	header = star_dict['data_'] + star_dict['loop_']
	header_len = len(header)
	with open(star) as read_star:
		lines = read_star.readlines()[header_len:-1]
	if args.screen != '0':		
		# get the sc number
		scn = star_dict['_rln'+args.screen]	
		if args.cutoff != '00':
			# Name the output files
			screened1 = '{}_screened_{}-gt-{}.star'.format(basename, args.screen, args.cutoff)
			screened2 = '{}_screened_{}-le-{}.star'.format(basename, args.screen, args.cutoff)
			write_screen1 = open(screened1, 'w')
			write_screen1.write(''.join(header))
			write_screen2 = open(screened2, 'w')
			write_screen2.write(''.join(header))
			for line in lines:
				if float(line.split()[scn]) > float(args.cutoff):
					write_screen1.write(line)
				else:
					write_screen2.write(line)
			write_screen1.write(' \n')
			write_screen1.close()
			write_screen2.write(' \n')
			write_screen2.close()
			print 'The screened star files have been written in {} and {}!'.format(screened1, screened2)
		elif args.sfile != '0':		
			with open('{}_screened.star'.format(basename), 'w') as write_screen:
				write_screen.write(''.join(header))
				if args.white == 0:
					for line in lines:
						key = line.split()[scn]
						if key not in lines_sfile:
							print 'Include {}.'.format(key)
							write_screen.write(line)
				else:
					for line in lines:
						key = line.split()[scn]
						if key in lines_sfile:
							print 'Include {}.'.format(key)
							write_screen.write(line)
				write_screen.write(' \n')
	elif args.sfile != '0':
		with open('{}_screened.star'.format(basename), 'w') as write_screen:
			write_screen.write(''.join(header))
			if args.white == 0:
				for line in lines:
					skip = 0
					for key in lines_sfile:
						if key in line:
							skip = 1
							print 'Skip {}.'.format(key)
							break
					if skip == 0:
						write_screen.write(line)
			else:
				for line in lines:
					for key in lines_sfile:
						if key in line:
							print 'Include {}.'.format(key)
							write_screen.write(line)
							break
			write_screen.write(' \n')
Ejemplo n.º 6
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = progname + """ [options] <star files>
	Output the coordinates from star files. Origin offsets will be considered as integers.
	"""
	
	args_def = {'box':-1, 'edge':-1, 'x':3710, 'y':3838}	
	parser = argparse.ArgumentParser()
	parser.add_argument("star", nargs='*', help="specify star files to be processed")
	parser.add_argument("-b", "--box", type=int, help="specify a box size (in pixel) for output, by default {} (output .star only)".format(args_def['box']))
	parser.add_argument("-e", "--edge", type=int, help="specify a distance (in pixel) between box center and micrograph edge, by default {} (don't exclude edge)".format(args_def['edge']))
	parser.add_argument("-x", "--x", type=int, help="provide the x dimension (in pixel) of micrographs, by default {}".format(args_def['x']))
	parser.add_argument("-y", "--y", type=int, help="provide the y dimension (in pixel) of micrographs, by default {}".format(args_def['y']))
	args = parser.parse_args()
	
	if len(sys.argv) == 1:
		print "usage: " + usage
		print "Please run '" + progname + " -h' for detailed options."
		sys.exit(1)
	# get default values
	for i in args_def:
		if args.__dict__[i] == None:
			args.__dict__[i] = args_def[i]
	# loop over all input files		
	for star in args.star:
		star_dict = p3s.star_parse(star, 'data_')
		header_len = len(star_dict['data_'])+len(star_dict['loop_'])
		# if the star is after the particle extraction step
		if '_rlnImageName' in star_dict:
			out_dict = {}
			with open(star) as s_read:
				lines = s_read.readlines()[header_len:-1]
				# loop over lines, generate a dict: {out_name:[{ptcl#:line#}]}
				for j, line in enumerate(lines):
					line = line.split()
					num, rlnImageName = line[star_dict['_rlnImageName']].split('@')
					# name the output by _rlnImageName
					out_name = os.path.basename(os.path.splitext(rlnImageName[:-5])[0])
					out_dict[out_name] = out_dict.get(out_name, []) + [{num:j}]
				# loop over out_dict, write coords for each key
				for out_name in out_dict:
					out = out_name+'.star'
					if args.box != -1:
						out_box = out_name+'.box'
						o_box_write = open(out_box, 'a')
					with open(out, 'a') as o_write:
						o_write.write('\ndata_\n\nloop_ \n_rlnCoordinateX #1 \n_rlnCoordinateY #2 \n')
						# the value of outname is a list, containing dictionaries, which is sorted by the keys (ptcl#) of the dictionaries
						for d in sorted(out_dict[out_name]):
							line = lines[d.values()[0]].split()
							# get old coord
							x, y = float(line[star_dict['_rlnCoordinateX']]), float(line[star_dict['_rlnCoordinateY']])
							# calculate new coord
							if '_rlnOriginX' in star_dict:
								x -= float(line[star_dict['_rlnOriginX']])
								y -= float(line[star_dict['_rlnOriginY']])
							# exclude the edge
							if args.edge != -1:
								if not args.edge<=x<=args.x-args.edge or not args.edge<=y<=args.y-args.edge:
									continue
							o_write.write('{:>12} '.format(x) + '{:>12} \n'.format(y))
							if args.box != -1:
								o_box_write.write('{}'.format(x-args.box/2.0) + '\t{}'.format(y-args.box/2.0) + '\t{}'.format(args.box) * 2 + '\n')
						o_write.write('\n')
					if args.box != -1:
						o_box_write.close()
		# else it is before the particle extraction step, so you must want to convert star to box
		elif args.box != -1:
			basename = os.path.basename(os.path.splitext(star)[0])
			with open(star) as s_read:
				lines = s_read.readlines()[header_len:-1]
				with open(basename+'.box', 'w') as o_box_write:
					for line in lines:
						line = line.split()
						# get old coord
						x, y = float(line[star_dict['_rlnCoordinateX']]), float(line[star_dict['_rlnCoordinateY']])
						# exclude the edge
						if args.edge != -1:
							if not args.edge<=x<=args.x-args.edge or not args.edge<=y<=args.y-args.edge:
								continue
						o_box_write.write('{}'.format(x-args.box/2.0) + '\t{}'.format(y-args.box/2.0) + '\t{}'.format(args.box) * 2 + '\n')