def main(): progname = os.path.basename(sys.argv[0]) usage = progname + " averages1 averages2 --th_grp" parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--T", type="int", default=0, help=" Threshold for matching") parser.add_option("--J", type="int", default=50, help=" J") parser.add_option("--max_branching", type="int", default=40, help=" maximum branching") parser.add_option("--verbose", action="store_true", default=False, help=" Threshold for matching") parser.add_option("--timing", action="store_true", default=False, help=" Get the timing") (options, args) = parser.parse_args() if global_def.CACHE_DISABLE: from utilities import disable_bdb_cache disable_bdb_cache() global_def.BATCH = True from numpy import array from statistics import k_means_stab_bbenum R = len(args) Parts = [] mem = [0]*R avg = [0]*R for r in xrange(R): data = EMData.read_images(args[r]) avg[r] = len(data) part = [] for k in xrange(len(data)): lid = data[k].get_attr('members') mem[r] += len(lid) lid = array(lid, 'int32') lid.sort() part.append(lid.copy()) Parts.append(part) if options.timing: from time import time time1 = time() MATCH, STB_PART, CT_s, CT_t, ST, st = k_means_stab_bbenum(Parts, T=options.T, J=options.J, max_branching=options.max_branching, stmult=0.1, branchfunc=2) if options.verbose: print MATCH print STB_PART print CT_s print CT_t print ST print st print " " for i in xrange(len(MATCH)): u = MATCH[i][0] # u is the group in question in partition 1 assert len(STB_PART[u]) == CT_s[u] print "Group ", for r in xrange(R): print "%3d "%(MATCH[i][r]), print " matches: group size = ", for r in xrange(R): print " %3d"%len(Parts[r][MATCH[i][r]]), print " matched size = %3d"%(CT_s[u]), if options.verbose: print " matched group = %s"%(STB_PART[u]) else: print "" print "\nNumber of averages = ", for r in xrange(R): print "%3d"%(avg[r]), print "\nTotal number of particles = ", for r in xrange(R): print "%3d"%(mem[r]), print " number of matched particles = %5d"%(sum(CT_s)) if options.timing: print "Elapsed time = ", time() - time1 global_def.BATCH = False
def multi_align_stability_new(ali_params, mir_stab_thld = 0.0, grp_err_thld = 10000.0, err_thld = 1.732, print_individual = False, d = 64): def sqerr(a): n = len(a) avg = sum(a) sq = 0.0 for i in xrange(n): sq += a[i]**2 return (sq-avg*avg/n)/n # args - G, data -[T, d] def func(args, data, return_avg_pixel_error=True): # Computes pixel error per particle given transformation parameters (G_l) from math import pi, sin, cos, sqrt from utilities import combine_params2 ali_params = data[0] d = data[1] #print ali_params L = len(ali_params) N = len(ali_params[0])/4 #print " FUNC",N,L,d args_list= [0.0]*(L*3) for i in xrange(L*3-3): args_list[i] = args[i] pt = Transform({"type":"2D"}) sqr_pixel_error = [0.0]*N ave_params =[] hmir = 0 for i in xrange(N): sum_cosa = 0.0 sum_sina = 0.0 sx = [0.0]*L sy = [0.0]*L alpha = [0.0]*L for l in xrange(L): alpha[l], sx[l], sy[l], mirror12 = combine_params2(ali_params[l][i*4+0], ali_params[l][i*4+1], ali_params[l][i*4+2], int(ali_params[l][i*4+3]), args_list[l*3+0],args_list[l*3+1],args_list[l*3+2],0) hmir += mirror12 sum_cosa += cos(alpha[l]*pi/180.0) sum_sina += sin(alpha[l]*pi/180.0) sqrtP = sqrt(sum_cosa**2+sum_sina**2) sum_cosa /= sqrtP sum_sina /= sqrtP # This completes calculation of matrix H_i """ anger = 0.0 for l in xrange(L): anger += (cos(alpha[l]*pi/180.0)-sum_cosa)**2 anger += (sin(alpha[l]*pi/180.0)-sum_sina)**2 anger *= 2 sqr_pixel_error[i] = d*d/4.*anger/L/4.+sqerr(sx)+sqerr(sy) """ sqr_pixel_error[i] = d*d/4*(1.0-sqrtP/L) + sqerr(sx) + sqerr(sy) # Get ave transform params pt.set_matrix([sum_cosa, sum_sina, 0.0, sum(sx)/L, -sum_sina, sum_cosa, 0.0, sum(sy)/L, 0.0, 0.0, 1.0, 0.0]) dd = pt.get_params("2D") # We are using here mirror of the FIRST SET. pt = Transform({"type":"2D","alpha":dd[ "alpha" ],"tx":dd[ "tx" ],"ty": dd[ "ty" ],"mirror":int(ali_params[0][i*4+3]),"scale":1.0}) dd = pt.get_params("2D") ave_params.append([dd[ "alpha" ], dd[ "tx" ], dd[ "ty" ], dd[ "mirror" ]]) #three different approaches give the same solution: #print i,d*d/4*(1.0-sqrtP/L) + sqerr(sx) + sqerr(sy),sqr_pixel_error[i]#, (sin((alpha[0]-alpha[1])*pi/180.0/4.0)*(d))**2/2 + ((sx[0]-sx[1])/2)**2 + ((sy[0]-sy[1])/2)**2 # Warning: Whatever I return here is squared pixel error, this is for the easy expression of derivative # Don't forget to square root it after getting the value if return_avg_pixel_error: return sum(sqr_pixel_error)/N else: return sqr_pixel_error, ave_params #### MAIN BODY ################################################################################################### from statistics import k_means_stab_bbenum from utilities import combine_params2 from numpy import array from math import sqrt # I decided not to use scipy in order to reduce the dependency, I wrote the C++ code instead # from scipy import array, int32 # from scipy.optimize.lbfgsb import fmin_l_bfgs_b # Find out the subset which is mirror stable over all runs all_part = [] num_ali = len(ali_params) nima = len(ali_params[0])/4 #print num_ali,nima for i in xrange(num_ali): mirror0 = [] mirror1 = [] for j in xrange(nima): ali_params[i][j*4+3] = int(ali_params[i][j*4+3]) if ali_params[i][j*4+3] == 0: mirror0.append(j) else: mirror1.append(j) mirror0 = array(mirror0, 'int32') mirror1 = array(mirror1, 'int32') all_part.append([mirror0, mirror1]) match, stab_part, CT_s, CT_t, ST, st = k_means_stab_bbenum(all_part, T=0, nguesses=1) mir_stab_part = stab_part[0] + stab_part[1] mir_stab_rate = len(mir_stab_part)/float(nima) if mir_stab_rate <= mir_stab_thld: return [], mir_stab_rate, -1.0 mir_stab_part.sort() del all_part, match, stab_part, CT_s, CT_t, ST, st #for j in xrange(nima): # print j, ali_params[0][j*4:j*4+4], ali_params[1][j*4:j*4+4] # Keep the alignment parameters of mirror stable particles ali_params_mir_stab = [[] for i in xrange(num_ali)] for j in mir_stab_part: for i in xrange(num_ali): ali_params_mir_stab[i].extend(ali_params[i][j*4:j*4+4]) nima2 = len(mir_stab_part) # Compute alignment parameters for the first numali-2 sets against the last (num_ali-1) one args = [] for i in xrange(num_ali-1): alpha, sx, sy, mirror = align_diff_params(ali_params_mir_stab[i], ali_params_mir_stab[num_ali-1]) args.extend([alpha, sx, sy]) #print " ALI PARAMS ",alpha, sx, sy, mirror #print " ALI PARAMS ",args # Do an initial analysis, purge all outlier particles, whose pixel error are larger than three times of threshold data = [ali_params_mir_stab, d] pixel_error, ave_params = func(array(args), data, return_avg_pixel_error=False) # Intercept here <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #if True: # these errors should now be the same as computed by ave_ali_err_params, which also uses align_diff_params as in the code above. # return pixel_error_before ali_params_cleaned = [[] for i in xrange(num_ali)] cleaned_part = [] for j in xrange(nima2): if sqrt(pixel_error[j]) <= 3*err_thld: cleaned_part.append(j) for i in xrange(num_ali): ali_params_cleaned[i].extend(ali_params_mir_stab[i][j*4:j*4+4]) nima3 = len(cleaned_part) if nima3 <= 1: return [], mir_stab_rate, sqrt(sum(pixel_error)/nima2) #print " CLEANED PART contains ",nima3," images" # Compute alignment parameters AGAIN after exclusion of outliers for the first numali-2 sets against the last (num_ali-1) one args = [] for i in xrange(num_ali-1): alpha, sx, sy, mirror = align_diff_params(ali_params_cleaned[i], ali_params_cleaned[num_ali-1]) args.extend([alpha, sx, sy]) #print " ALI PARAMS ",alpha, sx, sy, mirror #print " ALI PARAMS ",args # repeat the analysis for all particles, for outliers the error is supposed to increase pixel_error, ave_params = func(array(args), data, return_avg_pixel_error=False) stable_set = [] val = 0.0 for i in xrange(nima): if i in mir_stab_part: j = mir_stab_part.index(i) err = sqrt(pixel_error[j]) if err < err_thld: stable_set.append([err, mir_stab_part[j], ave_params[j]]) val += err if print_individual: print "Particle %4d : pixel error = %18.4f"%(i, err) else: if print_individual: print "Particle %4d : pixel error = %18.4f unstable"%(i, err) else: if print_individual: print "Particle %4d : Mirror unstable"%i #stable_set.sort() #if True: # these errors should now be the same as computed by ave_ali_err_params, which also uses align_diff_params as in the code above. # return pixel_error return stable_set, mir_stab_rate, val/len(cleaned_part)
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + " averages1 averages2 --th_grp" parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--T", type="int", default=0, help=" Threshold for matching") parser.add_option("--J", type="int", default=50, help=" J") parser.add_option("--max_branching", type="int", default=40, help=" maximum branching") parser.add_option("--verbose", action="store_true", default=False, help=" Threshold for matching") parser.add_option("--timing", action="store_true", default=False, help=" Get the timing") (options, args) = parser.parse_args() if global_def.CACHE_DISABLE: from utilities import disable_bdb_cache disable_bdb_cache() global_def.BATCH = True from numpy import array from statistics import k_means_stab_bbenum R = len(args) Parts = [] mem = [0]*R avg = [0]*R for r in xrange(R): data = EMData.read_images(args[r]) avg[r] = len(data) part = [] for k in xrange(len(data)): lid = data[k].get_attr('members') mem[r] += len(lid) lid = array(lid, 'int32') lid.sort() part.append(lid.copy()) Parts.append(part) if options.timing: from time import time time1 = time() MATCH, STB_PART, CT_s, CT_t, ST, st = k_means_stab_bbenum(Parts, T=options.T, J=options.J, max_branching=options.max_branching, stmult=0.1, branchfunc=2) if options.verbose: print(MATCH) print(STB_PART) print(CT_s) print(CT_t) print(ST) print(st) print(" ") for i in xrange(len(MATCH)): u = MATCH[i][0] # u is the group in question in partition 1 assert len(STB_PART[u]) == CT_s[u] print("Group ", end=' ') for r in xrange(R): print("%3d "%(MATCH[i][r]), end=' ') print(" matches: group size = ", end=' ') for r in xrange(R): print(" %3d"%len(Parts[r][MATCH[i][r]]), end=' ') print(" matched size = %3d"%(CT_s[u]), end=' ') if options.verbose: print(" matched group = %s"%(STB_PART[u])) else: print("") print("\nNumber of averages = ", end=' ') for r in xrange(R): print("%3d"%(avg[r]), end=' ') print("\nTotal number of particles = ", end=' ') for r in xrange(R): print("%3d"%(mem[r]), end=' ') print(" number of matched particles = %5d"%(sum(CT_s))) if options.timing: print("Elapsed time = ", time() - time1) global_def.BATCH = False
def multi_align_stability(ali_params, mir_stab_thld = 0.0, grp_err_thld = 10000.0, err_thld = 1.732, print_individual = False, d = 64): def ave(a): n = len(a) ave = 0.0 for i in xrange(n): ave += a[i] ave /= n return ave def var(a): n = len(a) avg = ave(a) var = 0.0 for i in xrange(n): var += (a[i]-avg)**2 return var/n # args - G, data -[T, d] def func(args, data, return_avg_pixel_error=True): from math import pi, sin, cos ali_params = data[0] d = data[1] L = len(ali_params) N = len(ali_params[0])/4 args_list= [0.0]*(L*3) for i in xrange(L*3-3): args_list[i] = args[i] cosa = [0.0]*L sina = [0.0]*L for i in xrange(L): cosa[i] = cos(args_list[i*3]*pi/180.0) sina[i] = sin(args_list[i*3]*pi/180.0) sqr_pixel_error = [0.0]*N for i in xrange(N): sum_cosa = 0.0 sum_sina = 0.0 sx = [0.0]*L sy = [0.0]*L for j in xrange(L): if int(ali_params[j][i*4+3]) == 0: sum_cosa += cos((args_list[j*3]+ali_params[j][i*4])*pi/180.0) sum_sina += sin((args_list[j*3]+ali_params[j][i*4])*pi/180.0) sx[j] = args_list[j*3+1]+ali_params[j][i*4+1]*cosa[j]-ali_params[j][i*4+2]*sina[j] sy[j] = args_list[j*3+2]+ali_params[j][i*4+1]*sina[j]+ali_params[j][i*4+2]*cosa[j] else: sum_cosa += cos((-args_list[j*3]+ali_params[j][i*4])*pi/180.0) sum_sina += sin((-args_list[j*3]+ali_params[j][i*4])*pi/180.0) sx[j] = -args_list[j*3+1]+ali_params[j][i*4+1]*cosa[j]+ali_params[j][i*4+2]*sina[j] sy[j] = args_list[j*3+2]-ali_params[j][i*4+1]*sina[j]+ali_params[j][i*4+2]*cosa[j] P = sqrt(sum_cosa**2+sum_sina**2) sqr_pixel_error[i] = d*d/4.*(1-P/L)+var(sx)+var(sy) # Warning: Whatever I return here is squared pixel error, this is for the easy expression of derivative # Don't forget to square root it after getting the value if return_avg_pixel_error: return sum(sqr_pixel_error)/N else: return sqr_pixel_error ''' def dfunc(args, data): from math import pi, sin, cos from numpy import zeros, array, float64 g = zeros(args.shape, float64) ali_params = data[0] d = data[1] L = len(ali_params) N = len(ali_params[0])/4 args_list= [0.0]*(L*3) for i in xrange(L*3-3): args_list[i] = args[i] cosa = [0.0]*L sina = [0.0]*L for i in xrange(L): cosa[i] = cos(args_list[i*3]*pi/180.0) sina[i] = sin(args_list[i*3]*pi/180.0) for i in xrange(N): sum_cosa = 0.0 sum_sina = 0.0 sx = [0.0]*L sy = [0.0]*L for j in xrange(L): if int(ali_params[j][i*4+3]) == 0: sum_cosa += cos((args_list[j*3]+ali_params[j][i*4])*pi/180.0) sum_sina += sin((args_list[j*3]+ali_params[j][i*4])*pi/180.0) sx[j] = args_list[j*3+1]+ali_params[j][i*4+1]*cosa[j]-ali_params[j][i*4+2]*sina[j] sy[j] = args_list[j*3+2]+ali_params[j][i*4+1]*sina[j]+ali_params[j][i*4+2]*cosa[j] else: sum_cosa += cos((-args_list[j*3]+ali_params[j][i*4])*pi/180.0) sum_sina += sin((-args_list[j*3]+ali_params[j][i*4])*pi/180.0) sx[j] = -args_list[j*3+1]+ali_params[j][i*4+1]*cosa[j]+ali_params[j][i*4+2]*sina[j] sy[j] = args_list[j*3+2]-ali_params[j][i*4+1]*sina[j]+ali_params[j][i*4+2]*cosa[j] P = sqrt(sum_cosa**2+sum_sina**2) sum_cosa /= P sum_sina /= P for j in xrange(L-1): # Original formula, useful for double-checking, DON'T DELETE! #g[j*3] += d*d/4.0*(-1.0)*0.5/P*(-2*sum_cosa*P*sin((args_list[j*3]+ali_params[j][i*4])*pi/180.0)+\ # 2*sum_sina*P*cos((args_list[j*3]+ali_params[j][i*4])*pi/180.0))*pi/180.0+\ # 2.0*(sx[j]-ave(sx))*(-ali_params[j][i*4+1]*sin(args_list[j*3]*pi/180.0)-ali_params[j][i*4+2]*cos(args_list[j*3]*pi/180.0))*pi/180.0+\ # 2.0*(sy[j]-ave(sy))*( ali_params[j][i*4+1]*cos(args_list[j*3]*pi/180.0)-ali_params[j][i*4+2]*sin(args_list[j*3]*pi/180.0))*pi/180.0 dx = 2.0*(sx[j]-ave(sx)) dy = 2.0*(sy[j]-ave(sy)) if int(ali_params[j][i*4+3]) == 0: g[j*3] += (d*d/4.0*(sum_cosa*sin((args_list[j*3]+ali_params[j][i*4])*pi/180.0)-\ sum_sina*cos((args_list[j*3]+ali_params[j][i*4])*pi/180.0))+\ dx*(-ali_params[j][i*4+1]*sina[j]-ali_params[j][i*4+2]*cosa[j])+\ dy*( ali_params[j][i*4+1]*cosa[j]-ali_params[j][i*4+2]*sina[j]))*pi/180.0 g[j*3+1] += dx g[j*3+2] += dy else: g[j*3] += (d*d/4.0*(-sum_cosa*sin((-args_list[j*3]+ali_params[j][i*4])*pi/180.0)+\ sum_sina*cos((-args_list[j*3]+ali_params[j][i*4])*pi/180.0))+\ dx*(-ali_params[j][i*4+1]*sina[j]+ali_params[j][i*4+2]*cosa[j])+\ dy*(-ali_params[j][i*4+1]*cosa[j]-ali_params[j][i*4+2]*sina[j]))*pi/180.0 g[j*3+1] += -dx g[j*3+2] += dy g /= (N*L) return g ''' from statistics import k_means_stab_bbenum from utilities import combine_params2 from numpy import array from math import sqrt # I decided not to use scipy in order to reduce the dependency, I wrote the C++ code instead # from scipy import array, int32 # from scipy.optimize.lbfgsb import fmin_l_bfgs_b # Find out the subset which is mirror stable over all runs all_part = [] num_ali = len(ali_params) nima = len(ali_params[0])/4 for i in xrange(num_ali): mirror0 = [] mirror1 = [] for j in xrange(nima): if ali_params[i][j*4+3] == 0: mirror0.append(j) else: mirror1.append(j) mirror0 = array(mirror0, 'int32') mirror1 = array(mirror1, 'int32') all_part.append([mirror0, mirror1]) match, stab_part, CT_s, CT_t, ST, st = k_means_stab_bbenum(all_part, T=0, nguesses=1) mir_stab_part = stab_part[0] + stab_part[1] mir_stab_rate = len(mir_stab_part)/float(nima) if mir_stab_rate <= mir_stab_thld: return [], mir_stab_rate, -1.0 mir_stab_part.sort() del all_part, match, stab_part, CT_s, CT_t, ST, st # Keep the alignment paramters of mirror stable particles ali_params_mir_stab = [[] for i in xrange(num_ali)] for j in mir_stab_part: for i in xrange(num_ali): ali_params_mir_stab[i].extend(ali_params[i][j*4:j*4+4]) nima2 = len(mir_stab_part) # Find out the alignment parameters for each iteration against the last one args = [] for i in xrange(num_ali-1): alpha, sx, sy, mirror = align_diff_params(ali_params_mir_stab[i], ali_params_mir_stab[num_ali-1]) args.extend([alpha, sx, sy]) # Do an initial analysis, purge all outlier particles, whose pixel error are larger than three times of threshold data = [ali_params_mir_stab, d] pixel_error_before = func(array(args), data, return_avg_pixel_error=False) ali_params_cleaned = [[] for i in xrange(num_ali)] cleaned_part = [] for j in xrange(nima2): if sqrt(pixel_error_before[j]) > 3*err_thld: continue cleaned_part.append(j) for i in xrange(num_ali): ali_params_cleaned[i].extend(ali_params_mir_stab[i][j*4:j*4+4]) nima3 = len(cleaned_part) if nima3 <= 1: return [], mir_stab_rate, sqrt(sum(pixel_error_before)/nima2) # Use LBFGSB to minimize the sum of pixel errors data = [ali_params_cleaned, d] # Use Python code #ps_lp, val, d = fmin_l_bfgs_b(func, array(args), args=[data], fprime=dfunc, bounds=None, m=10, factr=1e3, pgtol=1e-4, iprint=-1, maxfun=100) # Use C++ code ali_params_cleaned_list = [] for params in ali_params_cleaned: ali_params_cleaned_list.extend(params) results = Util.multi_align_error(args, ali_params_cleaned_list, d) ps_lp = results[:-1] val = results[-1] if val < 0.0: # This will happen in some rare cases, it should be due to rounding errors, # because all results show the val is about 1e-13. #print "Strange results" #print "args =", args #print "ali_params_cleaned_list =", ali_params_cleaned_list #print "results = ", results val = 0.0 del ali_params_cleaned_list if sqrt(val) > grp_err_thld: return [], mir_stab_rate, sqrt(val) pixel_error_after = func(ps_lp, data, return_avg_pixel_error=False) if print_individual: for i in xrange(nima): if i in mir_stab_part: j = mir_stab_part.index(i) if j in cleaned_part: print "Particle %4d : pixel error = %8.4f \n"%(i, sqrt(pixel_error_after[cleaned_part.index(j)])) else: print "Particle %4d : pixel error = %8.4f outlier \n"%(i, sqrt(pixel_error_before[j])) else: print "Particle %4d : Mirror unstable \n"%i stable_set = [] for i in xrange(nima3): err = sqrt(pixel_error_after[i]) if err < err_thld: stable_set.append([err, mir_stab_part[cleaned_part[i]]]) stable_set.sort() return stable_set, mir_stab_rate, sqrt(val)