def get_bdb_list(path): "Given a bdb:* path, returns a list of items in the path" dicts=db_list_dicts(path) if '#' in path : if path.split("#")[-1] in dicts : # check for existance of single specified dictionary return [path] else : return [] return ["%s#%s"%(path,d) for d in dicts]
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <inputfiles> A generic 2D image processing program which acts on multiple input stacks. Default operation is similar to e2proc2d.py --inplace, but operating on many files. """ parser = EMArgumentParser(usage=usage, version=EMANVERSION) parser.add_argument( "--postfix", type=str, help= "Adds this string to each input filename to produce output filename (avoid overwriting)", default=None) parser.add_argument( "--allparticles", action="store_true", help= "Will process all particle sets stored in BDB in the particles subdirectory", default=False) parser.add_argument("--apix", type=float, help="A/pixel for S scaling") parser.add_argument( "--clip", metavar="xsize,ysize", type=str, action="append", help= "Specify the output size in pixels xsize,ysize[,xcenter,ycenter], images can be made larger or smaller." ) parser.add_argument( "--process", metavar="processor_name:param1=value1:param2=value2", type=str, action="append", help= "apply a processor named 'processorname' with all its parameters/values." ) parser.add_argument( "--autoinvert", action="store_true", help= "Automatically decides whether to invert each stack of images to make particles white (EMAN2 convention). Decision is made for an entire stack. Non-inverted images will NOT BET PROCESSED AT ALL !", default=False) parser.add_argument( "--mult", metavar="k", type=float, help="Multiply image by a constant. mult=-1 to invert contrast.") parser.add_argument( "--meanshrink", metavar="n", type=int, action="append", help= "Reduce an image size by an integral scaling factor using average. Clip is not required." ) parser.add_argument( "--medianshrink", metavar="n", type=int, action="append", help= "Reduce an image size by an integral scaling factor, uses median filter. Clip is not required." ) parser.add_argument( "--multfile", type=str, action="append", help= "Multiplies the volume by another volume of identical size. This can be used to apply masks, etc." ) parser.add_argument( "--norefs", action="store_true", help= "Skip any input images which are marked as references (usually used with classes.*)" ) parser.add_argument("--radon", action="store_true", help="Do Radon transform") parser.add_argument( "--randomize", type=str, action="append", help= "Randomly rotate/translate the image. Specify: da,dxy,flip da is a uniform distribution over +-da degrees, dxy is a uniform distribution on x/y, if flip is 1, random handedness changes will occur" ) parser.add_argument("--rotate", type=float, action="append", help="Rotate clockwise (in degrees)") parser.add_argument( "--fp", type=int, help= "This generates rotational/translational 'footprints' for each input particle, the number indicates which algorithm to use (0-6)" ) parser.add_argument( "--scale", metavar="f", type=float, action="append", help= "Scale by specified scaling factor. Clip must also be specified to change the dimensions of the output map." ) parser.add_argument( "--selfcl", metavar="steps mode", type=int, nargs=2, help= "Output file will be a 180x180 self-common lines map for each image.") parser.add_argument("--translate", type=str, action="append", help="Translate by x,y pixels") parser.add_argument( "--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, help= "verbose level [0-9], higner number means higher level of verboseness", default=0) parser.add_argument( "--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID", default=-1) parser.add_argument("--writejunk", action="store_true", help="Writes the image even if its sigma is 0.", default=False) append_options = [ "clip", "process", "meanshrink", "medianshrink", "scale", "randomize", "rotate", "translate", "multfile" ] optionlist = pyemtbx.options.get_optionlist(sys.argv[1:]) (options, args) = parser.parse_args() if options.allparticles: args = ["bdb:particles#" + i for i in db_list_dicts("bdb:particles")] args.sort() if options.verbose: print("%d particle files identified" % len(args)) if len(args) < 1: print("Please specify at least one input file to operate on") sys.exit(1) logid = E2init(sys.argv, options.ppid) for infile in args: if options.postfix != None: if infile[:4].lower() == "bdb:": outfile = infile + options.postfix elif "." in infile: outfile = infile.rsplit( ".", 1)[0] + options.postfix + "." + infile.rsplit(".", 1)[1] else: outfile = infile + options.postfix else: outfile = infile if options.verbose > 2: Log.logger().set_level(options.verbose - 2) d = EMData() nimg = EMUtil.get_image_count(infile) ld = EMData() if options.autoinvert: sumin, sumout = 0, 0 suminsig = 0 for i in range(nimg): d.read_image(infile, i) d1 = d.process("mask.sharp", {"outer_radius": old_div(d["nx"] * 2, 7)}) d2 = d.process( "mask.sharp", { "inner_radius": old_div(d["nx"] * 2, 7), "outer_radius": old_div(d["nx"], 2) - 2 }) sumin += d1["mean"] sumout += d2["mean"] suminsig += d1["sigma"] doinvert = sumin < sumout if options.verbose and doinvert: print("Inverting ", infile) else: continue # if (sumin<sumout and fabs(sumin-sumout)/suminsig>.01) :print infile,sumin,sumout,suminsig,sumin>sumout # continue if options.verbose: print("%s : processing %d images" % (infile, nimg)) lasttime = time.time() for i in range(nimg): if options.verbose >= 1: if time.time() - lasttime > 3 or options.verbose > 2: sys.stdout.write(" %7d\r" % i) sys.stdout.flush() lasttime = time.time() d.read_image(infile, i) sigma = d.get_attr("sigma").__float__() if sigma == 0: print("Warning: sigma = 0 for image ", i) if options.writejunk == False: print( "Use the writejunk option to force writing this image to disk" ) continue index_d = {} for append_option in append_options: index_d[append_option] = 0 for option1 in optionlist: nx = d.get_xsize() ny = d.get_ysize() if option1 == "apix": apix = options.apix d.set_attr('apix_x', apix) d.set_attr('apix_y', apix) d.set_attr('apix_z', apix) try: if i == n0 and d["ctf"].apix != apix: print( "Warning: A/pix value in CTF was %1.2f, changing to %1.2f. May impact CTF parameters." % (d["ctf"].apix, apix)) d["ctf"].apix = apix except: pass if option1 == "process": fi = index_d[option1] (processorname, param_dict) = parsemodopt(options.process[fi]) if not param_dict: param_dict = {} d.process_inplace(processorname, param_dict) index_d[option1] += 1 elif option1 == "mult": d.mult(options.mult) elif option1 == "autoinvert" and doinvert: d.mult(-1.0) d["autoinvert"] = True elif option1 == "multfile": mf = EMData(options.multfile[index_d[option1]], 0) d.mult(mf) mf = None index_d[option1] += 1 elif option1 == "norefs" and d["ptcl_repr"] <= 0: continue elif option1 == "setsfpairs": dataf = d.do_fft() # d.gimme_fft() x0 = 0 step = 0.5 if i % 2 == 0: sfcurve1 = dataf.calc_radial_dist(nx, x0, step) else: sfcurve2 = dataf.calc_radial_dist(nx, x0, step) for j in range(nx): if sfcurve1[j] > 0 and sfcurve2[j] > 0: sfcurve2[j] = sqrt( old_div(sfcurve1[j], sfcurve2[j])) else: sfcurve2[j] = 0 dataf.apply_radial_func(x0, step, sfcurve2) d = dataf.do_ift() # dataf.gimme_fft(); elif option1 == "fp": d = d.make_footprint(options.fp) elif option1 == "scale": scale_f = options.scale[index_d[option1]] if scale_f != 1.0: d.scale(scale_f) index_d[option1] += 1 elif option1 == "rotate": rotatef = options.rotate[index_d[option1]] if rotatef != 0.0: d.rotate(rotatef, 0, 0) index_d[option1] += 1 elif option1 == "translate": tdx, tdy = options.translate[index_d[option1]].split(",") tdx, tdy = float(tdx), float(tdy) if tdx != 0.0 or tdy != 0.0: d.translate(tdx, tdy, 0.0) index_d[option1] += 1 elif option1 == "clip": ci = index_d[option1] clipcx = old_div(nx, 2) clipcy = old_div(ny, 2) try: clipx, clipy, clipcx, clipcy = options.clip[ci].split( ",") except: clipx, clipy = options.clip[ci].split(",") clipx, clipy = int(clipx), int(clipy) clipcx, clipcy = int(clipcx), int(clipcy) e = d.get_clip( Region(clipcx - old_div(clipx, 2), clipcy - old_div(clipy, 2), clipx, clipy)) try: e.set_attr("avgnimg", d.get_attr("avgnimg")) except: pass d = e index_d[option1] += 1 elif option1 == "randomize": ci = index_d[option1] rnd = options.randomize[ci].split(",") rnd[0] = float(rnd[0]) rnd[1] = float(rnd[1]) rnd[2] = int(rnd[2]) t = Transform() t.set_params({ "type": "2d", "alpha": random.uniform(-rnd[0], rnd[0]), "mirror": random.randint(0, rnd[2]), "tx": random.uniform(-rnd[1], rnd[1]), "ty": random.uniform(-rnd[1], rnd[1]) }) d.transform(t) elif option1 == "medianshrink": shrink_f = options.medianshrink[index_d[option1]] if shrink_f > 1: d.process_inplace("math.medianshrink", {"n": shrink_f}) index_d[option1] += 1 elif option1 == "meanshrink": mshrink = options.meanshrink[index_d[option1]] if mshrink > 1: d.process_inplace("math.meanshrink", {"n": mshrink}) index_d[option1] += 1 elif option1 == "selfcl": scl = old_div(options.selfcl[0], 2) sclmd = options.selfcl[1] sc = EMData() if sclmd == 0: sc.common_lines_real(d, d, scl, true) else: e = d.copy() e.process_inplace("xform.phaseorigin") if sclmd == 1: sc.common_lines(e, e, sclmd, scl, true) sc.process_inplace( "math.linear", Dict("shift", EMObject(-90.0), "scale", EMObject(-1.0))) elif sclmd == 2: sc.common_lines(e, e, sclmd, scl, true) else: print("Error: invalid common-line mode '" + sclmd + "'") sys.exit(1) elif option1 == "radon": r = d.do_radon() d = r d.write_image(outfile, i) E2end(logid)
def main(): global debug progname = os.path.basename(sys.argv[0]) usage = """prog [options] <path or db> ... Various utilities related to BDB databases. examples : e2bdb.py -c Is perhaps the most critical function, as it cleans up the database cache. See the Wiki for more. e2bdb.py <path> -s will list the contents of the database in a directory in bdb: notation e2bdb.py <path> -l Will give useful summary info about stacks in a directory e2bdb.py <database> --dump Gives a mechanism to dump all of the metadata in a database, even if the database contains no images """ parser = EMArgumentParser(usage=usage, version=EMANVERSION) parser.add_argument( "--cleanup", "-c", action="store_true", default=False, help= "This option will clean up the database cache so files can safely be moved or accessed on another computer via NFS." ) parser.add_argument( "--force", "-F", action="store_true", default=False, help= "This will force an action that would normally fail due to failed checks." ) parser.add_argument( "--delete", action="store_true", default=False, help="This will delete (or at least empty) the named database(s)") parser.add_argument("--all", "-a", action="store_true", help="List per-particle info", default=False) parser.add_argument("--long", "-l", action="store_true", help="Long listing", default=False) parser.add_argument("--short", "-s", action="store_true", help="Dense listing of names only", default=False) parser.add_argument( "--filt", type=str, help="Only include dictionary names containing the specified string", default=None) parser.add_argument( "--filtexclude", type=str, help="Exclude dictionary names containing the specified string", default=None) parser.add_argument( "--match", type=str, help= "Only include dictionaries matching the provided Python regular expression", default=None) parser.add_argument( "--exclude", type=str, help="The name of a database containing a list of exclusion keys", default=None) parser.add_argument( "--dump", "-D", action="store_true", help= "List contents of an entire database, eg 'e2bdb.py -D refine_01#register", default=False) parser.add_argument( "--smalldump", action="store_true", help= "Lists contents of an entire database, but only list 2 items per dictionary to better see headers", default=False) parser.add_argument( "--extractplots", action="store_true", help= "If a database contains sets of plots, such as bdb:refine_xx#convergence.results, this will extract the plots as text files." ) parser.add_argument( "--check", action="store_true", help= "Check for self-consistency and errors in the structure of specified databases", default=False) parser.add_argument("--nocache", action="store_true", help="Don't use the database cache for this operation", default=False) parser.add_argument( "--merge", action="store_true", help= "This will merge the contents of BDB 2-N into BDB 1 (including BDB 1's contents)", default=False) parser.add_argument( "--makevstack", type=str, help= "Creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks", default=None) parser.add_argument( "--appendvstack", type=str, help= "Appends to/creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks", default=None) parser.add_argument( "--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help= "verbose level [0-9], higher number means higher level of verboseness") parser.add_argument( "--list", type=str, help= "Specify the name of a file with a list of images to use in creation of virtual stacks. Please see source for details.", default=None) parser.add_argument( "--exlist", type=str, help= "Specify the name of a file with a list of images to exclude in creation of virtual stacks. Please see source for details.", default=None) parser.add_argument( "--restore", type=str, help= "Write changes in the derived virtual stack back to the original stack", default=None) parser.add_argument( "--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID", default=-1) parser.add_argument( "--checkctf", action="store_true", help= "Verfies that all images in the file contain CTF information, and gives some basic statistics", default=False) parser.add_argument( "--step", type=str, default="0,1", help= "Specify <init>,<step>[,<max>]. Processes only a subset of the input data. For example, 0,2 would process only the even numbered particles" ) (options, args) = parser.parse_args() if options.nocache: EMAN2db.BDB_CACHE_DISABLE = True if options.cleanup: db_cleanup(options.force) sys.exit(0) try: options.step = int(options.step.split(",")[0]), int( options.step.split(",")[1]), int( options.step.split(",")[2]) # convert strings to tuple except: try: options.step = int(options.step.split(",")[0]), int( options.step.split(",")[1]) except: print("Invalid --step specification") sys.exit(1) if options.all: options.long = 1 if len(args) == 0: args.append("bdb:.") logid = 0 if options.makevstack: logid = E2init(sys.argv) vstack = db_open_dict(options.makevstack) vstackn = 0 elif options.appendvstack: logid = E2init(sys.argv) vstack = db_open_dict(options.appendvstack) vstackn = len(vstack) else: vstack = None if options.merge: print("WARNING: Merge mode\nCombining contents of: ", ", ".join(args[1:])) print("into ", args[0]) if input("Proceed (y/n) :").lower() != "y": print("Aborting") sys.exit(1) for i, path in enumerate(args): if path.lower()[:4] == "bdb:" and not "#" in path: path = "bdb:.#" + path[4:] if path.lower()[:4] != "bdb:": path = "bdb:" + path if i == 0: outdb = db_open_dict(path) continue indb = db_open_dict(path, True) for k in list(indb.keys()): outdb[k] = indb[k] print("Merging complete") sys.exit(0) for path in args: if path.lower()[:4] == "bdb:" and not "#" in path: uu = os.path.split(path) if (uu[0] == ''): path = "bdb:.#" + path[4:] else: path = uu[0] + "#" + uu[1] if path.lower()[:4] != "bdb:": path = "bdb:" + path if '#' in path: if len(args) > 1: print("\n", path, ":") path, dbs = path.rsplit("#", 1) path += "#" dbs = [dbs] else: if not '#' in path and path[-1] != '/': path += '#' if len(args) > 1: print("\n", path[:-1], ":") dbs = db_list_dicts(path) dbs.sort() if options.filt: dbs = [db for db in dbs if options.filt in db] if options.filtexclude: dbs = [db for db in dbs if options.filtexclude not in db] if options.match != None: dbs = [db for db in dbs if re.match(options.match, db)] if options.list: if options.makevstack == None and options.appendvstack == None: print( "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart" ) sys.exit(1) vdata = open(options.list, 'r').readlines() n = len(vdata[0].split()) slist = [] for line in vdata: line = line.split() for i in range(n): val = int(line[i]) slist.append(val) del n, val, vdata if options.exlist: if options.makevstack == None: print( "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart" ) sys.exit(1) vdata = open(options.exlist, 'r').readlines() n = len(vdata[0].split()) slist = [] for line in vdata: line = line.split() for i in range(n): val = int(line[i]) slist.append(val) n = EMUtil.get_image_count(args[0]) good = set(range(n)) - set(slist) slist = [i for i in good] slist.sort() del n, val, vdata, good if options.makevstack != None or options.appendvstack != None: vspath = os.path.realpath(vstack.path) + "/" if options.verbose > 2: print("vspath: ", vspath) for db in dbs: dct, keys = db_open_dict(path + db, ro=True, with_keys=True) if dct == vstack: continue if len(options.step) == 2: if keys == None: vals = list( range(options.step[0], len(dct), options.step[1])) else: vals = keys[options.step[0]::options.step[ 1]] # we apply --step even if we have a list of keys else: if keys == None: vals = list( range(options.step[0], options.step[2], options.step[1])) else: vals = keys[options.step[0]:options.step[2]:options.step[ 1]] # we apply --step even if we have a list of keys if options.list != None or options.exlist != None: vals = slist for n in vals: try: d = dct.get(n, nodata=1).get_attr_dict() except: traceback.print_exc() print("---\nerror reading ", db, n) continue # This block converts an absolute path to the actual data to a relative path try: dpath = os.path.realpath(dct.get_data_path(n)) if options.verbose > 2: print("dpath: ", dpath) if os.name == 'nt': vspath = vspath.replace("\\", '/') dpath = dpath.replace('\\', '/') rpath = makerelpath(vspath, dpath) if options.verbose > 2: print("rpath: ", rpath) except: print("error with data_path ", db, n) continue d["data_path"] = rpath d["data_n"] = n d["data_source"] = path + db if d["data_path"] == None: print("error with data_path ", db, n) continue vstack[vstackn] = d vstackn += 1 if vstackn % 100 == 0: try: print("\r ", vstackn, " ", end=' ') sys.stdout.flush() except: pass print("\r ", vstackn, " ") dct.close() try: maxname = max([len(s) for s in dbs]) except: print("Error reading ", path) if options.restore: nima = EMUtil.get_image_count(options.restore) IB = db_open_dict(options.restore) source_old = None if len(options.step) == 3: nima = min(options.step[2], nima) for i in range(options.step[0], nima, options.step[1]): source = IB.get_header(i) source_path = source["source_path"] ID = source["source_n"] if (source_old != source_path): if (source_old != None): DB.close() DB = db_open_dict(source_path, ro=True) source_old = source_path target = DB.get_header(ID) try: source["data_path"] = target["data_path"] source["data_n"] = target["data_n"] source["source_path"] = target["source_path"] source["source_n"] = target["source_n"] except: # top level does not have data_path del source['data_path'] del source['data_n'] source["source_path"] = target["source_path"] source["source_n"] = target["source_n"] DB.set_header(ID, source) DB.close() if options.extractplots: for db in dbs: print("#### Extracting plots from ", db) dct = db_open_dict(path + db, ro=True) #### Dump keys = list(dct.keys()) keys.sort() for k in keys: v = dct[k] try: ns = [len(i) for i in v] fsp = db + "-" + k + ".txt" print("%s (%d columns)" % (fsp, len(ns))) out = open(fsp, "w") for i in range(ns[0]): for j in range(len(ns)): out.write(str(v[j][i])) if j < len(ns) - 1: out.write("\t") out.write("\n") out.close() except: continue dct.close() if options.smalldump: for db in dbs: print("##### ", db) dct = db_open_dict(path + db, ro=True) #### Dump keys = list(dct.keys()) keys.sort() if len(options.step) == 3: keys = keys[:options.step[2]] for k in keys[options.step[0]::options.step[1]]: v = dct[k] print("%s : " % k, end=' ') if isinstance(v, list) or isinstance(v, tuple): for i in v: print("\n\t%s" % str(i), end=' ') print("") elif isinstance(v, dict): ks2 = list(v.keys()) ks2.sort() kc = 0 for i in ks2: if kc >= 2: print("...") break print("\n\t%s : %s" % (i, v[i]), end=' ') kc += 1 print("") else: print(str(v)) dct.close() if options.checkctf: for db in dbs: print("##### CTF -> ", db) dct = db_open_dict(path + db, ro=True) keys = list(dct.keys()) if len(options.step) == 3: keys = keys[:options.step[2]] defocus = set() for k in keys[options.step[0]::options.step[1]]: v = dct.get_header(k) try: ctf = v["ctf"] except: if k != "maxrec": print("CTF missing on image %s" % k) continue defocus.add(ctf.defocus) defocus = list(defocus) print("Defocuses found: ", end=' ') for i in defocus: print("%1.3f, " % i, end=' ') print("\n\nRange: %1.3f - %1.3f (%d unique values)" % (min(defocus), max(defocus), len(defocus))) if options.dump: for db in dbs: print("##### ", db) dct = db_open_dict(path + db, ro=True) #### Dump keys = list(dct.keys()) if len(options.step) == 3: keys = keys[:options.step[2]] keys.sort() for k in keys[options.step[0]::options.step[1]]: v = dct[k] print("%s : " % k, end=' ') if isinstance(v, list) or isinstance(v, tuple): for i in v: print("\n\t%s" % str(i), end=' ') print("") elif isinstance(v, dict): ks2 = list(v.keys()) ks2.sort() for i in ks2: print("\n\t%s : %s" % (i, v[i]), end=' ') print("") else: print(str(v)) dct.close() # long listing, one db per line elif options.long: width = maxname + 3 fmt = "%%-%ds %%-07d %%14s %%s" % width fmt2 = "%%-%ds (not an image stack)" % width total = [0, 0] for db in dbs: dct = db_open_dict(path + db, True) ### Info on all particles if options.all: mx = len(dct) if len(options.step) == 3: mx = min(mx, options.step[2]) for i in range(options.step[0], mx, options.step[1]): try: im = dct[i] if im == None: raise Exception except: continue print( "%d. %d x %d x %d\tA/pix=%1.2f\tM=%1.4f\tS=%1.4f\tSk=%1.4f" % (i, im["nx"], im["ny"], im["nz"], im["apix_x"], im["mean"], im["sigma"], im["skewness"]), end=' ') try: print("\t%s" % str(im["model_id"])) except: pass try: print("\tdf=%1.3f\tB=%1.1f" % (im["ctf"].defocus, im["ctf"].bfactor)) except: print(" ") first = EMData() try: first.read_image(path + db, 0, True) size = first.get_xsize() * first.get_ysize( ) * first.get_zsize() * len(dct) * 4 total[0] += len(dct) total[1] += size print(fmt % (db, len(dct), "%dx%dx%d apix: %1.2f" % (first.get_xsize(), first.get_ysize(), first.get_zsize(), first["apix_x"]), human_size(size)), end=' ') except: print(fmt2 % db) try: print("\tdf: %1.3f\tB: %1.0f" % (first["ctf"].defocus, first["ctf"].bfactor)) except: print("") dct.close() print(fmt % ("TOTAL", total[0], "", human_size(total[1]))) elif options.check: from pickle import loads for db in dbs: dct = db_open_dict(path + db, ro=True) dct.realopen() keys = list(dct.bdb.keys()) allkvp = {} for k in keys: s1, s2 = k.split("\x80", 1) # start of a pickled string. s2 = loads("\x80" + s2) # the pickled part if len( s1 ) > 0: # If anything unpickled, then it is an axbxc prefix identifying the location of a binary st = allkvp.setdefault( s1, set()) # set of all positions seen so far v = loads(dct.bdb.get(k)) # position in binary file if v in st: print( "Error: value %d seen multiple times in %s (%s,%s)" % (v, db, s1, s2)) st.add(v) print("%s : " % db, end=' ') for i in list(allkvp.keys()): if options.verbose > 0: print("%s %d/%d\t" % (i, len(allkvp[i]), int(max(allkvp[i])) + 1), end=' ') if len(allkvp[i]) != int(max(allkvp[i]) + 1): print( "\nMismatch found in %s. Could be normal if file has been rewritten multiple times, but is unusual" % db) if options.verbose > 0: print("") else: print(" done") dct.close() elif options.short: for db in dbs: print(path + db, end=' ') print(" ") elif not options.makevstack and not options.appendvstack: # Nicely formatted 'ls' style display cols = int(floor(old_div(80.0, (maxname + 3)))) width = old_div(80, cols) rows = int(ceil(old_div(float(len(dbs)), cols))) fmt = "%%-%ds" % width for r in range(rows): for c in range(cols): try: print(fmt % dbs[r + c * rows], end=' ') except: pass print(" ") if options.delete: if not options.force: print("You are requesting to delete the following databases:") for db in dbs: print(db, " ", end=' ') if input("\nAre you sure (y/n) ? ")[0].lower() != 'y': print("Aborted") sys.exit(1) for db in dbs: db_remove_dict(path + db) if logid: E2end(logid)
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <inputfiles> A generic 2D image processing program which acts on multiple input stacks. Default operation is similar to e2proc2d.py --inplace, but operating on many files. """ parser = EMArgumentParser(usage=usage,version=EMANVERSION) parser.add_argument("--postfix", type=str, help="Adds this string to each input filename to produce output filename (avoid overwriting)",default=None) parser.add_argument("--allparticles",action="store_true",help="Will process all particle sets stored in BDB in the particles subdirectory",default=False) parser.add_argument("--apix", type=float, help="A/pixel for S scaling") parser.add_argument("--clip", metavar="xsize,ysize", type=str, action="append", help="Specify the output size in pixels xsize,ysize[,xcenter,ycenter], images can be made larger or smaller.") parser.add_argument("--process", metavar="processor_name:param1=value1:param2=value2", type=str, action="append", help="apply a processor named 'processorname' with all its parameters/values.") parser.add_argument("--autoinvert", action="store_true",help="Automatically decides whether to invert each stack of images to make particles white (EMAN2 convention). Decision is made for an entire stack. Non-inverted images will NOT BET PROCESSED AT ALL !",default=False) parser.add_argument("--mult", metavar="k", type=float, help="Multiply image by a constant. mult=-1 to invert contrast.") parser.add_argument("--meanshrink", metavar="n", type=int, action="append", help="Reduce an image size by an integral scaling factor using average. Clip is not required.") parser.add_argument("--medianshrink", metavar="n", type=int, action="append", help="Reduce an image size by an integral scaling factor, uses median filter. Clip is not required.") parser.add_argument("--multfile", type=str, action="append", help="Multiplies the volume by another volume of identical size. This can be used to apply masks, etc.") parser.add_argument("--norefs", action="store_true", help="Skip any input images which are marked as references (usually used with classes.*)") parser.add_argument("--radon", action="store_true", help="Do Radon transform") parser.add_argument("--randomize", type=str, action="append",help="Randomly rotate/translate the image. Specify: da,dxy,flip da is a uniform distribution over +-da degrees, dxy is a uniform distribution on x/y, if flip is 1, random handedness changes will occur") parser.add_argument("--rotate", type=float, action="append", help="Rotate clockwise (in degrees)") parser.add_argument("--fp", type=int, help="This generates rotational/translational 'footprints' for each input particle, the number indicates which algorithm to use (0-6)") parser.add_argument("--scale", metavar="f", type=float, action="append", help="Scale by specified scaling factor. Clip must also be specified to change the dimensions of the output map.") parser.add_argument("--selfcl", metavar="steps mode", type=int, nargs=2, help="Output file will be a 180x180 self-common lines map for each image.") parser.add_argument("--translate", type=str, action="append", help="Translate by x,y pixels") parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, help="verbose level [0-9], higner number means higher level of verboseness",default=0) parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1) parser.add_argument("--writejunk", action="store_true", help="Writes the image even if its sigma is 0.", default=False) append_options = ["clip", "process", "meanshrink", "medianshrink", "scale", "randomize", "rotate", "translate", "multfile"] optionlist = pyemtbx.options.get_optionlist(sys.argv[1:]) (options, args) = parser.parse_args() if options.allparticles: args=["bdb:particles#"+i for i in db_list_dicts("bdb:particles")] args.sort() if options.verbose : print "%d particle files identified"%len(args) if len(args)<1 : print "Please specify at least one input file to operate on" sys.exit(1) logid=E2init(sys.argv,options.ppid) for infile in args: if options.postfix!=None : if infile[:4].lower()=="bdb:" : outfile=infile+options.postfix elif "." in infile : outfile=infile.rsplit(".",1)[0]+options.postfix+"."+infile.rsplit(".",1)[1] else : outfile=infile+options.postfix else : outfile=infile if options.verbose>2: Log.logger().set_level(options.verbose-2) d = EMData() nimg = EMUtil.get_image_count(infile) ld = EMData() if options.autoinvert: sumin,sumout=0,0 suminsig=0 for i in xrange(nimg): d.read_image(infile, i) d1=d.process("mask.sharp",{"outer_radius":d["nx"]*2/7}) d2=d.process("mask.sharp",{"inner_radius":d["nx"]*2/7,"outer_radius":d["nx"]/2-2}) sumin+=d1["mean"] sumout+=d2["mean"] suminsig+=d1["sigma"] doinvert=sumin<sumout if options.verbose and doinvert : print "Inverting ",infile else : continue # if (sumin<sumout and fabs(sumin-sumout)/suminsig>.01) :print infile,sumin,sumout,suminsig,sumin>sumout # continue if options.verbose: print "%s : processing %d images"%(infile,nimg) lasttime=time.time() for i in xrange(nimg): if options.verbose >= 1: if time.time()-lasttime>3 or options.verbose>2 : sys.stdout.write(" %7d\r" %i) sys.stdout.flush() lasttime=time.time() d.read_image(infile, i) sigma = d.get_attr("sigma").__float__() if sigma == 0: print "Warning: sigma = 0 for image ",i if options.writejunk == False: print "Use the writejunk option to force writing this image to disk" continue index_d = {} for append_option in append_options: index_d[append_option] = 0 for option1 in optionlist: nx = d.get_xsize() ny = d.get_ysize() if option1 == "apix": apix = options.apix d.set_attr('apix_x', apix) d.set_attr('apix_y', apix) d.set_attr('apix_z', apix) try: if i==n0 and d["ctf"].apix!=apix : print "Warning: A/pix value in CTF was %1.2f, changing to %1.2f. May impact CTF parameters."%(d["ctf"].apix,apix) d["ctf"].apix=apix except: pass if option1 == "process": fi = index_d[option1] (processorname, param_dict) = parsemodopt(options.process[fi]) if not param_dict : param_dict={} d.process_inplace(processorname, param_dict) index_d[option1] += 1 elif option1 == "mult" : d.mult(options.mult) elif option1 == "autoinvert" and doinvert: d.mult(-1.0) d["autoinvert"]=True elif option1 == "multfile": mf=EMData(options.multfile[index_d[option1]],0) d.mult(mf) mf=None index_d[option1] += 1 elif option1 == "norefs" and d["ptcl_repr"] <= 0: continue elif option1 == "setsfpairs": dataf = d.do_fft() # d.gimme_fft() x0 = 0 step = 0.5 if i%2 == 0: sfcurve1 = dataf.calc_radial_dist(nx, x0, step) else: sfcurve2 = dataf.calc_radial_dist(nx, x0, step) for j in range(nx): if sfcurve1[j] > 0 and sfcurve2[j] > 0: sfcurve2[j] = sqrt(sfcurve1[j] / sfcurve2[j]) else: sfcurve2[j] = 0; dataf.apply_radial_func(x0, step, sfcurve2); d = dataf.do_ift(); # dataf.gimme_fft(); elif option1 == "fp": d = d.make_footprint(options.fp) elif option1 == "scale": scale_f = options.scale[index_d[option1]] if scale_f != 1.0: d.scale(scale_f) index_d[option1] += 1 elif option1 == "rotate": rotatef = options.rotate[index_d[option1]] if rotatef!=0.0 : d.rotate(rotatef,0,0) index_d[option1] += 1 elif option1 == "translate": tdx,tdy=options.translate[index_d[option1]].split(",") tdx,tdy=float(tdx),float(tdy) if tdx !=0.0 or tdy != 0.0 : d.translate(tdx,tdy,0.0) index_d[option1] += 1 elif option1 == "clip": ci = index_d[option1] clipcx=nx/2 clipcy=ny/2 try: clipx,clipy,clipcx,clipcy = options.clip[ci].split(",") except: clipx, clipy = options.clip[ci].split(",") clipx, clipy = int(clipx),int(clipy) clipcx, clipcy = int(clipcx),int(clipcy) e = d.get_clip(Region(clipcx-clipx/2, clipcy-clipy/2, clipx, clipy)) try: e.set_attr("avgnimg", d.get_attr("avgnimg")) except: pass d = e index_d[option1] += 1 elif option1 == "randomize" : ci = index_d[option1] rnd = options.randomize[ci].split(",") rnd[0]=float(rnd[0]) rnd[1]=float(rnd[1]) rnd[2]=int(rnd[2]) t=Transform() t.set_params({"type":"2d","alpha":random.uniform(-rnd[0],rnd[0]),"mirror":random.randint(0,rnd[2]),"tx":random.uniform(-rnd[1],rnd[1]),"ty":random.uniform(-rnd[1],rnd[1])}) d.transform(t) elif option1 == "medianshrink": shrink_f = options.medianshrink[index_d[option1]] if shrink_f > 1: d.process_inplace("math.medianshrink",{"n":shrink_f}) index_d[option1] += 1 elif option1 == "meanshrink": mshrink = options.meanshrink[index_d[option1]] if mshrink > 1: d.process_inplace("math.meanshrink",{"n":mshrink}) index_d[option1] += 1 elif option1 == "selfcl": scl = options.selfcl[0] / 2 sclmd = options.selfcl[1] sc = EMData() if sclmd == 0: sc.common_lines_real(d, d, scl, true) else: e = d.copy() e.process_inplace("xform.phaseorigin") if sclmd == 1: sc.common_lines(e, e, sclmd, scl, true) sc.process_inplace("math.linear", Dict("shift", EMObject(-90.0), "scale", EMObject(-1.0))) elif sclmd == 2: sc.common_lines(e, e, sclmd, scl, true) else: print "Error: invalid common-line mode '" + sclmd + "'" sys.exit(1) elif option1 == "radon": r = d.do_radon() d = r d.write_image(outfile,i)
def main(): global debug progname = os.path.basename(sys.argv[0]) usage = """prog [options] <path or db> ... Various utilities related to BDB databases. examples : e2bdb.py -c Is perhaps the most critical function, as it cleans up the database cache. See the Wiki for more. e2bdb.py <path> -s will list the contents of the database in a directory in bdb: notation e2bdb.py <path> -l Will give useful summary info about stacks in a directory e2bdb.py <database> --dump Gives a mechanism to dump all of the metadata in a database, even if the database contains no images """ parser = EMArgumentParser(usage=usage,version=EMANVERSION) parser.add_argument("--cleanup","-c",action="store_true",default=False,help="This option will clean up the database cache so files can safely be moved or accessed on another computer via NFS.") parser.add_argument("--force","-F",action="store_true",default=False,help="This will force an action that would normally fail due to failed checks.") parser.add_argument("--delete",action="store_true",default=False,help="This will delete (or at least empty) the named database(s)") parser.add_argument("--all","-a",action="store_true",help="List per-particle info",default=False) parser.add_argument("--long","-l",action="store_true",help="Long listing",default=False) parser.add_argument("--short","-s",action="store_true",help="Dense listing of names only",default=False) parser.add_argument("--filt",type=str,help="Only include dictionary names containing the specified string",default=None) parser.add_argument("--filtexclude",type=str,help="Exclude dictionary names containing the specified string",default=None) parser.add_argument("--match",type=str,help="Only include dictionaries matching the provided Python regular expression",default=None) parser.add_argument("--exclude",type=str,help="The name of a database containing a list of exclusion keys",default=None) parser.add_argument("--dump","-D",action="store_true",help="List contents of an entire database, eg 'e2bdb.py -D refine_01#register",default=False) parser.add_argument("--smalldump",action="store_true",help="Lists contents of an entire database, but only list 2 items per dictionary to better see headers",default=False) parser.add_argument("--extractplots",action="store_true",help="If a database contains sets of plots, such as bdb:refine_xx#convergence.results, this will extract the plots as text files.") parser.add_argument("--check",action="store_true",help="Check for self-consistency and errors in the structure of specified databases",default=False) parser.add_argument("--nocache",action="store_true",help="Don't use the database cache for this operation",default=False) parser.add_argument("--merge",action="store_true",help="This will merge the contents of BDB 2-N into BDB 1 (including BDB 1's contents)",default=False) parser.add_argument("--makevstack",type=str,help="Creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks",default=None) parser.add_argument("--appendvstack",type=str,help="Appends to/creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks",default=None) parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higher number means higher level of verboseness") parser.add_argument("--list",type=str,help="Specify the name of a file with a list of images to use in creation of virtual stacks. Please see source for details.",default=None) parser.add_argument("--exlist",type=str,help="Specify the name of a file with a list of images to exclude in creation of virtual stacks. Please see source for details.",default=None) parser.add_argument("--restore",type=str,help="Write changes in the derived virtual stack back to the original stack",default=None) parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1) parser.add_argument("--checkctf",action="store_true",help="Verfies that all images in the file contain CTF information, and gives some basic statistics",default=False) parser.add_argument("--step",type=str,default="0,1",help="Specify <init>,<step>[,<max>]. Processes only a subset of the input data. For example, 0,2 would process only the even numbered particles") (options, args) = parser.parse_args() if options.nocache : EMAN2db.BDB_CACHE_DISABLE=True if options.cleanup : db_cleanup(options.force) sys.exit(0) try : options.step=int(options.step.split(",")[0]),int(options.step.split(",")[1]),int(options.step.split(",")[2]) # convert strings to tuple except: try: options.step=int(options.step.split(",")[0]),int(options.step.split(",")[1]) except: print "Invalid --step specification" sys.exit(1) if options.all : options.long=1 if len(args)==0 : args.append("bdb:.") logid=0 if options.makevstack : logid=E2init(sys.argv) vstack=db_open_dict(options.makevstack) vstackn=0 elif options.appendvstack : logid=E2init(sys.argv) vstack=db_open_dict(options.appendvstack) vstackn=len(vstack) else : vstack=None if options.merge : print "WARNING: Merge mode\nCombining contents of: ",", ".join(args[1:]) print "into ",args[0] if raw_input("Proceed (y/n) :").lower() != "y" : print "Aborting" sys.exit(1) for i,path in enumerate(args): if path.lower()[:4]=="bdb:" and not "#" in path : path="bdb:.#"+path[4:] if path.lower()[:4]!="bdb:" : path="bdb:"+path if i==0 : outdb=db_open_dict(path) continue indb=db_open_dict(path,True) for k in indb.keys(): outdb[k]=indb[k] print "Merging complete" sys.exit(0) for path in args: if path.lower()[:4]=="bdb:" and not "#" in path : uu = os.path.split(path) if(uu[0] == ''): path="bdb:.#"+path[4:] else: path=uu[0]+"#"+uu[1] if path.lower()[:4]!="bdb:" : path="bdb:"+path if '#' in path : if len(args)>1 : print "\n",path,":" path,dbs=path.rsplit("#",1) path+="#" dbs=[dbs] else: if not '#' in path and path[-1]!='/' : path+='#' if len(args)>1 : print "\n",path[:-1],":" dbs=db_list_dicts(path) dbs.sort() if options.filt: dbs=[db for db in dbs if options.filt in db] if options.filtexclude: dbs=[db for db in dbs if options.filtexclude not in db] if options.match!=None: dbs=[db for db in dbs if re.match(options.match,db)] if options.list : if options.makevstack==None and options.appendvstack==None : print "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart" sys.exit(1) vdata=open(options.list,'r').readlines() n=len(vdata[0].split()) slist=[] for line in vdata: line=line.split() for i in xrange(n): val=int(line[i]) slist.append(val) del n,val,vdata if options.exlist : if options.makevstack==None: print "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart" sys.exit(1) vdata=open(options.exlist,'r').readlines() n=len(vdata[0].split()) slist=[] for line in vdata: line=line.split() for i in xrange(n): val=int(line[i]) slist.append(val) n = EMUtil.get_image_count(args[0]) good = set(range(n)) - set(slist) slist = [i for i in good] slist.sort() del n,val,vdata,good if options.makevstack!=None or options.appendvstack!=None : vspath=os.path.realpath(vstack.path)+"/" if options.verbose>2 : print "vspath: ",vspath for db in dbs: dct,keys=db_open_dict(path+db,ro=True,with_keys=True) if dct==vstack : continue if len(options.step)==2 : if keys == None: vals = xrange(options.step[0],len(dct),options.step[1]) else: vals = keys[options.step[0]::options.step[1]] # we apply --step even if we have a list of keys else: if keys == None: vals = xrange(options.step[0],options.step[2],options.step[1]) else: vals = keys[options.step[0]:options.step[2]:options.step[1]] # we apply --step even if we have a list of keys if options.list !=None or options.exlist != None: vals=slist for n in vals: try: d=dct.get(n,nodata=1).get_attr_dict() except: traceback.print_exc() print "---\nerror reading ",db,n continue # This block converts an absolute path to the actual data to a relative path try: dpath=os.path.realpath(dct.get_data_path(n)) if options.verbose>2 : print "dpath: ",dpath if os.name == 'nt': vspath=vspath.replace("\\", '/') dpath=dpath.replace('\\', '/') rpath=makerelpath(vspath,dpath) if options.verbose>2 : print "rpath: ",rpath except: print "error with data_path ",db,n continue d["data_path"]=rpath d["data_n"]=n d["data_source"]= path+db if d["data_path"]==None : print "error with data_path ",db,n continue vstack[vstackn]=d vstackn+=1 if vstackn%100==0: try: print "\r ",vstackn," ", sys.stdout.flush() except: pass print "\r ",vstackn," " dct.close() try: maxname=max([len(s) for s in dbs]) except: print "Error reading ",path if options.restore : nima = EMUtil.get_image_count(options.restore) IB = db_open_dict(options.restore) source_old = None if len(options.step)==3 : nima=min(options.step[2],nima) for i in xrange(options.step[0],nima,options.step[1]): source = IB.get_header(i) source_path = source["source_path"] ID = source["source_n"] if( source_old != source_path): if( source_old != None): DB.close() DB = db_open_dict(source_path,ro=True) source_old = source_path target = DB.get_header( ID ) try: source["data_path"] = target["data_path"] source["data_n"] = target["data_n"] source["source_path"] = target["source_path"] source["source_n"] = target["source_n"] except: # top level does not have data_path del source['data_path'] del source['data_n'] source["source_path"] = target["source_path"] source["source_n"] = target["source_n"] DB.set_header(ID, source) DB.close() if options.extractplots : for db in dbs: print "#### Extracting plots from ",db dct=db_open_dict(path+db,ro=True) #### Dump keys=dct.keys() keys.sort() for k in keys: v=dct[k] try : ns=[len(i) for i in v] fsp=db+"-"+k+".txt" print "%s (%d columns)"%(fsp,len(ns)) out=file(fsp,"w") for i in range(ns[0]): for j in range(len(ns)): out.write(str(v[j][i])) if j<len(ns)-1 : out.write("\t") out.write("\n") out.close() except: continue dct.close() if options.smalldump : for db in dbs: print "##### ",db dct=db_open_dict(path+db,ro=True) #### Dump keys=dct.keys() keys.sort() if len(options.step)==3 : keys=keys[:options.step[2]] for k in keys[options.step[0]::options.step[1]]: v=dct[k] print "%s : "%k, if isinstance (v,list) or isinstance(v,tuple) : for i in v: print "\n\t%s"%str(i), print "" elif isinstance(v,dict) : ks2=v.keys() ks2.sort() kc=0 for i in ks2: if kc>=2 : print "..." break print "\n\t%s : %s"%(i,v[i]), kc+=1 print "" else : print str(v) dct.close() if options.checkctf: for db in dbs: print "##### CTF -> ",db dct=db_open_dict(path+db,ro=True) keys=dct.keys() if len(options.step)==3 : keys=keys[:options.step[2]] defocus=set() for k in keys[options.step[0]::options.step[1]]: v=dct.get_header(k) try: ctf=v["ctf"] except: if k!="maxrec" : print "CTF missing on image %s"%k continue defocus.add(ctf.defocus) defocus=list(defocus) print "Defocuses found: ", for i in defocus: print "%1.3f, "%i, print "\n\nRange: %1.3f - %1.3f (%d unique values)"%(min(defocus),max(defocus),len(defocus)) if options.dump : for db in dbs: print "##### ",db dct=db_open_dict(path+db,ro=True) #### Dump keys=dct.keys() if len(options.step)==3 : keys=keys[:options.step[2]] keys.sort() for k in keys[options.step[0]::options.step[1]]: v=dct[k] print "%s : "%k, if isinstance (v,list) or isinstance(v,tuple) : for i in v: print "\n\t%s"%str(i), print "" elif isinstance(v,dict) : ks2=v.keys() ks2.sort() for i in ks2: print "\n\t%s : %s"%(i,v[i]), print "" else : print str(v) dct.close() # long listing, one db per line elif options.long : width=maxname+3 fmt="%%-%ds %%-07d %%14s %%s"%width fmt2="%%-%ds (not an image stack)"%width total=[0,0] for db in dbs: dct=db_open_dict(path+db,True) ### Info on all particles if options.all : mx=len(dct) if len(options.step)==3 : mx=min(mx,options.step[2]) for i in range(options.step[0],mx,options.step[1]): try: im=dct[i] if im==None : raise Exception except: continue print "%d. %d x %d x %d\tA/pix=%1.2f\tM=%1.4f\tS=%1.4f\tSk=%1.4f"%(i,im["nx"],im["ny"],im["nz"],im["apix_x"],im["mean"],im["sigma"],im["skewness"]), try: print "\t%s"%str(im["model_id"]) except: pass try: print "\tdf=%1.3f\tB=%1.1f"%(im["ctf"].defocus,im["ctf"].bfactor) except: print " " first=EMData() try: first.read_image(path+db,0,True) size=first.get_xsize()*first.get_ysize()*first.get_zsize()*len(dct)*4; total[0]+=len(dct) total[1]+=size print fmt%(db,len(dct),"%dx%dx%d apix: %1.2f"%(first.get_xsize(),first.get_ysize(),first.get_zsize(),first["apix_x"]),human_size(size)), except: print fmt2%db try: print "\tdf: %1.3f\tB: %1.0f"%(first["ctf"].defocus,first["ctf"].bfactor) except: print "" dct.close() print fmt%("TOTAL",total[0],"",human_size(total[1])) elif options.check : from cPickle import loads for db in dbs: dct=db_open_dict(path+db,ro=True) dct.realopen() keys=dct.bdb.keys() allkvp={} for k in keys: s1,s2=k.split("\x80",1) # start of a pickled string. s2=loads("\x80"+s2) # the pickled part if len(s1)>0 : # If anything unpickled, then it is an axbxc prefix identifying the location of a binary st=allkvp.setdefault(s1,set()) # set of all positions seen so far v=loads(dct.bdb.get(k)) # position in binary file if v in st : print "Error: value %d seen multiple times in %s (%s,%s)"%(v,db,s1,s2) st.add(v) print "%s : "%db, for i in allkvp.keys(): if options.verbose>0 : print "%s %d/%d\t"%(i,len(allkvp[i]),int(max(allkvp[i]))+1), if len(allkvp[i])!=int(max(allkvp[i])+1) : print "\nMismatch found in %s. Could be normal if file has been rewritten multiple times, but is unusual"%db if options.verbose>0 : print "" else : print " done" dct.close() elif options.short : for db in dbs: print path+db, print " "