def logerror(path): io.savelog("Path failed: %s\n" % path, "error_dumpstream") def main(): try: line = sys.stdin.readline() while line: path = line.strip() filename = os.path.basename(path) args = ["mplayer", "-dumpstream", "-dumpfile", filename, path] retval = subprocess.call(args) if retval: logerror(path) line = sys.stdin.readline() except KeyboardInterrupt: io.write_abort() except Exception, e: s = "%s\n" % traceback.format_exc() s += "%s\n" % str(e) s += "Invocation string: %s\n" % str(args) io.write_err(s) if __name__ == "__main__": (parser, a) = io.init_opts("< <file>") (opts, args) = io.parse_args(parser) main()
node.outgoing[n] = None #for node in self.index.values(): # print node.incoming # print node.outgoing def _from_pickle(self): for node in self.index.values(): for n in node.incoming: node.incoming[n] = self.index[n] for n in node.outgoing: node.outgoing[n] = self.index[n] if __name__ == "__main__": (parser, a) = io.init_opts("<web> [options]") a("--dump", action="store_true", help="Dump all urls in web") a("--in", metavar="<url>", dest="into", help="Find incoming urls to <url>") a("--out", metavar="<url>", help="Find outgoing urls from <url>") a("--aliases", metavar="<url>", help="Find other urls for the document at <url>") a("--multiple", action="store_true", help="Find documents with multiple urls") a("--trace", metavar="<url>", help="Trace path from root to <url>") a("--deepest", action="store_true", help="Trace url furthest from root") a("--popular", action="store_true", help="Find the most referenced urls") a("--test", action="store_true", help="Run trace loop test") (opts, args) = io.parse_args(parser) try: if opts.test: wb = Web() wb.root = Node("a") wb.index["a"] = wb.root
if len(stack) > 1: # more than one layer of color col_bold = True if len(stack) > 0: # at least one layer col = stack[-1:].pop() str_fmt += str[cursor:pos] + shcolor.code(col, bold=col_bold) cursor = pos str_fmt += str[cursor:-1] return str_fmt if __name__ == "__main__": (parser, a) = io.init_opts("[ <url|file> [options] | --test ]") a("--dump", action="store_true", help="Dump urls") a("--test", action="store_true", help="Run spider testsuite") (opts, args) = io.parse_args(parser) try: url = None if opts.test: data = testcases else: url = args[0] data = urllib.urlopen(url).read() if opts.dump: for u in unique(unbox_it_to_ss(findall(data, url))): print u else:
for n in node.outgoing: node.outgoing[n] = None #for node in self.index.values(): # print node.incoming # print node.outgoing def _from_pickle(self): for node in self.index.values(): for n in node.incoming: node.incoming[n] = self.index[n] for n in node.outgoing: node.outgoing[n] = self.index[n] if __name__ == "__main__": (parser, a) = io.init_opts("<web> [options]") a("--dump", action="store_true", help="Dump all urls in web") a("--in", metavar="<url>", dest="into", help="Find incoming urls to <url>") a("--out", metavar="<url>", help="Find outgoing urls from <url>") a("--aliases", metavar="<url>", help="Find other urls for the document at <url>") a("--multiple", action="store_true", help="Find documents with multiple urls") a("--trace", metavar="<url>", help="Trace path from root to <url>") a("--deepest", action="store_true", help="Trace url furthest from root") a("--popular", action="store_true", help="Find the most referenced urls") a("--test", action="store_true", help="Run trace loop test") (opts, args) = io.parse_args(parser) try:
while queue: if depth > 0: depth -= 1 elif depth == 0: # There may still be records in the queue, but since depth is reached # no more spidering is allowed, so we allow one more iteration, but # only for fetching queue, outer_queue = split_queue(queue, rules.index(rule) == len(rules)-1) queue = process_records(queue, rule, wb) save_session(wb) if __name__ == "__main__": (parser, a) = io.init_opts("<url> ['<pattern>'] [options]") a("--recipe", metavar="<recipe>", dest="recipe", help="Use a spidering recipe") a("--fetch", action="store_true", help="Fetch urls, don't dump") a("--dump", action="store_true", help="Dump urls, don't fetch") a("--host", action="store_true", help="Only spider this host") a("--depth", type="int", metavar="<depth>", dest="depth", help="Spider to this depth") (opts, args) = io.parse_args(parser) try: if opts.fetch: os.environ["FETCH_ALL"] = "1" elif opts.dump: os.environ["DUMP_ALL"] = "1" if opts.host: os.environ["HOST_FILTER"] = "1" if opts.depth: os.environ["DEPTH"] = str(opts.depth)
depth -= 1 elif depth == 0: # There may still be records in the queue, but since depth is reached # no more spidering is allowed, so we allow one more iteration, but # only for fetching queue, outer_queue = split_queue( queue, rules.index(rule) == len(rules) - 1) queue = process_records(queue, rule, wb) save_session(wb) if __name__ == "__main__": (parser, a) = io.init_opts("<url> ['<pattern>'] [options]") a("--recipe", metavar="<recipe>", dest="recipe", help="Use a spidering recipe") a("--fetch", action="store_true", help="Fetch urls, don't dump") a("--dump", action="store_true", help="Dump urls, don't fetch") a("--host", action="store_true", help="Only spider this host") a("--depth", type="int", metavar="<depth>", dest="depth", help="Spider to this depth") (opts, args) = io.parse_args(parser) try: if opts.fetch: