def ensureSIFTFeatures(filepath, paramsSIFT, properties, csvDir, validateByFileExists=False): """ filepath: to the image from which SIFT features have been or have to be extracted. params: dict of registration parameters, including the key "scale". paramsSIFT: FloatArray2DSIFT.Params instance. csvDir: directory into which serialized features have been or will be saved. load: function to load an image as an ImageProcessor from the filepath. validateByFileExists: whether to merely check that the .obj file exists as a quick form of validation. First check if serialized features exist for the image, and if the Params match. Otherwise extract the features and store them serialized. Returns the ArrayList of Feature instances. """ path = os.path.join(csvDir, os.path.basename(filepath) + ".SIFT-features.obj") if validateByFileExists: if os.path.exists(path): return True # An ArrayList whose last element is a mpicbg.imagefeatures.FloatArray2DSIFT.Param # and all other elements are mpicbg.imagefeatures.Feature features = deserialize(path) if os.path.exists(path) else None if features: if features.get(features.size() - 1).equals(paramsSIFT): features.remove(features.size() - 1) # removes the Params syncPrintQ("Loaded %i SIFT features for %s" % (features.size(), os.path.basename(filepath))) return features else: # Remove the file: paramsSIFT have changed os.remove(path) # Else, extract de novo: try: # Extract features imp = loadImp(filepath) ip = imp.getProcessor() paramsSIFT = paramsSIFT.clone() ijSIFT = SIFT(FloatArray2DSIFT(paramsSIFT)) features = ArrayList() # of Feature instances ijSIFT.extractFeatures(ip, features) ip = None imp.flush() imp = None features.add( paramsSIFT ) # append Params instance at the end for future validation serialize(features, path) features.remove(features.size() - 1) # to return without the Params for immediate use syncPrintQ("Extracted %i SIFT features for %s" % (features.size(), os.path.basename(filepath))) except: printException() return features
def log_exc(exc, url, wb): exc_filename = io.safe_filename("exc", dir=io.LOGDIR) io.serialize(exc, exc_filename, dir=io.LOGDIR) s = traceback.format_exc() s += "\nBad url: |%s|\n" % url node = wb.get(url) for u in node.incoming.keys(): s += "Ref : |%s|\n" % u s += "Exception object serialized to file: %s\n\n" % exc_filename io.savelog(s, "error_log", "a")
def save_session(wb, queue=None): hostname = urlrewrite.get_hostname(wb.root.url) filename = urlrewrite.hostname_to_filename(hostname) io.write_err("Saving session to %s ..." % shcolor.color(shcolor.YELLOW, filename+".{web,session}")) io.serialize(wb, filename + ".web", dir=io.LOGDIR) if queue: io.serialize(queue, filename + ".session", dir=io.LOGDIR) # only web being saved, ie. spidering complete, remove old session elif io.file_exists(filename + ".session", dir=io.LOGDIR): io.delete(filename + ".session", dir=io.LOGDIR) io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
def save_session(wb, queue=None): hostname = urlrewrite.get_hostname(wb.root.url) filename = urlrewrite.hostname_to_filename(hostname) io.write_err("Saving session to %s ..." % shcolor.color(shcolor.YELLOW, filename + ".{web,session}")) io.serialize(wb, filename + ".web", dir=io.LOGDIR) if queue: io.serialize(queue, filename + ".session", dir=io.LOGDIR) # only web being saved, ie. spidering complete, remove old session elif io.file_exists(filename + ".session", dir=io.LOGDIR): io.delete(filename + ".session", dir=io.LOGDIR) io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
a("--trace", metavar="<url>", help="Trace path from root to <url>") a("--deepest", action="store_true", help="Trace url furthest from root") a("--popular", action="store_true", help="Find the most referenced urls") a("--test", action="store_true", help="Run trace loop test") (opts, args) = io.parse_args(parser) try: if opts.test: wb = Web() wb.root = Node("a") wb.index["a"] = wb.root wb.index["b"] = Node("b") wb.index["c"] = Node("c") #wb.index["b"].incoming["a"] = wb.root # cut link from a to b wb.index["b"].incoming["c"] = wb.index["c"] # create loop b <-> c wb.index["c"].incoming["b"] = wb.index["b"] io.serialize(wb, "web") wb = io.deserialize("web") print "Root :", wb.root.url print "Index:", wb.index print "b.in :", wb.index['b'].incoming print "c.in :", wb.index['c'].incoming wb.print_trace(wb.get_trace("c")) # inf loop if loop not detected sys.exit() wb = io.deserialize(args[0]) if opts.dump: wb.dump() elif opts.into or opts.out: wb.print_refs((opts.into or opts.out), opts.out) elif opts.aliases: wb.print_aliases(opts.aliases)