Beispiel #1
0
def save_session(wb, queue=None):
    hostname = urlrewrite.get_hostname(wb.root.url)
    filename = urlrewrite.hostname_to_filename(hostname)
    io.write_err("Saving session to %s ..." %
         shcolor.color(shcolor.YELLOW, filename+".{web,session}"))
    io.serialize(wb, filename + ".web", dir=io.LOGDIR)
    if queue: 
        io.serialize(queue, filename + ".session", dir=io.LOGDIR)
    # only web being saved, ie. spidering complete, remove old session
    elif io.file_exists(filename + ".session", dir=io.LOGDIR):
        io.delete(filename + ".session", dir=io.LOGDIR)
    io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
Beispiel #2
0
def save_session(wb, queue=None):
    hostname = urlrewrite.get_hostname(wb.root.url)
    filename = urlrewrite.hostname_to_filename(hostname)
    io.write_err("Saving session to %s ..." %
                 shcolor.color(shcolor.YELLOW, filename + ".{web,session}"))
    io.serialize(wb, filename + ".web", dir=io.LOGDIR)
    if queue:
        io.serialize(queue, filename + ".session", dir=io.LOGDIR)
    # only web being saved, ie. spidering complete, remove old session
    elif io.file_exists(filename + ".session", dir=io.LOGDIR):
        io.delete(filename + ".session", dir=io.LOGDIR)
    io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
Beispiel #3
0
def restore_session(url):
    hostname = urlrewrite.get_hostname(url)
    filename = urlrewrite.hostname_to_filename(hostname)
    q, wb = None, None
    if (io.file_exists(filename + ".web", dir=io.LOGDIR)):
        io.write_err("Restoring web from %s ..." %
             shcolor.color(shcolor.YELLOW, filename+".web"))
        wb = io.deserialize(filename + ".web", dir=io.LOGDIR)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    if (io.file_exists(filename + ".session", dir=io.LOGDIR)):
        io.write_err("Restoring session from %s ..." %
             shcolor.color(shcolor.YELLOW, filename+".session"))
        q = io.deserialize(filename + ".session", dir=io.LOGDIR)
        q = recipe.overrule_records(q)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    return q, wb
Beispiel #4
0
def restore_session(url):
    hostname = urlrewrite.get_hostname(url)
    filename = urlrewrite.hostname_to_filename(hostname)
    q, wb = None, None
    if (io.file_exists(filename + ".web", dir=io.LOGDIR)):
        io.write_err("Restoring web from %s ..." %
                     shcolor.color(shcolor.YELLOW, filename + ".web"))
        wb = io.deserialize(filename + ".web", dir=io.LOGDIR)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    if (io.file_exists(filename + ".session", dir=io.LOGDIR)):
        io.write_err("Restoring session from %s ..." %
                     shcolor.color(shcolor.YELLOW, filename + ".session"))
        q = io.deserialize(filename + ".session", dir=io.LOGDIR)
        q = recipe.overrule_records(q)
        io.write_err(shcolor.color(shcolor.GREEN, "done\n"))
    return q, wb
Beispiel #5
0
 def assert_in_web(self, url):
     if url not in self.index:
         io.write_err("Url %s not in the web\n" %
                      shcolor.color(shcolor.YELLOW, url))
         sys.exit(1)
Beispiel #6
0
 def assert_in_web(self, url):
     if url not in self.index:
         io.write_err("Url %s not in the web\n" %
                      shcolor.color(shcolor.YELLOW, url))
         sys.exit(1)
Beispiel #7
0
def write_abort():
    write_err("\n%s\n" % shcolor.color(shcolor.RED, "User aborted"))
Beispiel #8
0
    a("--fetch", action="store_true", help="Fetch urls, don't dump")
    a("--dump", action="store_true", help="Dump urls, don't fetch")
    a("--host", action="store_true", help="Only spider this host")
    a("--depth", type="int", metavar="<depth>", dest="depth", help="Spider to this depth")
    (opts, args) = io.parse_args(parser)
    try:
        if opts.fetch:
            os.environ["FETCH_ALL"] = "1"
        elif opts.dump:
            os.environ["DUMP_ALL"] = "1"
        if opts.host:
            os.environ["HOST_FILTER"] = "1"
        if opts.depth:
            os.environ["DEPTH"] = str(opts.depth)

        url = args[0]
        (q, w) = restore_session(url)
        if opts.recipe:
            rules = recipe.load_recipe(opts.recipe, url)
        else:
            pattern = args[1]
            rules = recipe.get_recipe(pattern, url)
        queue = q or recipe.get_queue(url, mode=fetch.Fetcher.SPIDER)
        wb = w or web.Web(url)
    except recipe.PatternError, e:
        io.write_err(shcolor.color(shcolor.RED, "%s\n" % e))
        sys.exit(1)
    except IndexError:
        io.opts_help(None, None, None, parser)
    main(queue, rules, wb)
Beispiel #9
0
      type="int",
      metavar="<depth>",
      dest="depth",
      help="Spider to this depth")
    (opts, args) = io.parse_args(parser)
    try:
        if opts.fetch:
            os.environ["FETCH_ALL"] = "1"
        elif opts.dump:
            os.environ["DUMP_ALL"] = "1"
        if opts.host:
            os.environ["HOST_FILTER"] = "1"
        if opts.depth:
            os.environ["DEPTH"] = str(opts.depth)

        url = args[0]
        (q, w) = restore_session(url)
        if opts.recipe:
            rules = recipe.load_recipe(opts.recipe, url)
        else:
            pattern = args[1]
            rules = recipe.get_recipe(pattern, url)
        queue = q or recipe.get_queue(url, mode=fetch.Fetcher.SPIDER)
        wb = w or web.Web(url)
    except recipe.PatternError, e:
        io.write_err(shcolor.color(shcolor.RED, "%s\n" % e))
        sys.exit(1)
    except IndexError:
        io.opts_help(None, None, None, parser)
    main(queue, rules, wb)