parser.print_help() sys.exit(1) num_threads = AG_args.get_num_threads_or_die( args ) num_retries = AG_args.get_max_retries_or_die( args ) blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists ) # make the hierarchy log.info("crawl %s" % "ftp://" + args.hostname[0] + args.root_dir ) ftp_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists ) ftp_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec, dir_reval_sec_cb = lambda path: args.reval_sec, file_perm_cb = lambda path: file_perm, dir_perm_cb = lambda path: dir_perm, query_string_cb = lambda path: "ftp://" + args.hostname[0] + os.path.join( args.root_dir, path.strip("/") ) ) hierarchy = AG_ftp.build_hierarchy( args.hostname[0], args.root_dir, ftp_include_callback, ftp_specfile_callbacks, num_threads = num_threads, ftp_username = args.username, ftp_password = args.password, max_retries = max_retries, allow_partial_failure = (not args.fail_fast) ) if hierarchy is not None: specfile_text = AG_specfile.generate_specfile( {}, hierarchy ) print specfile_text
listing_path = args.listing_path except: listing_path = None blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists) # make the hierarchy log.info("crawl gs://m-lab%s" % args.root_dir) mlab_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists) mlab_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb=lambda path: args.reval_sec, dir_reval_sec_cb=lambda path: args.reval_sec, file_perm_cb=lambda path: file_perm, dir_perm_cb=lambda path: dir_perm, query_string_cb=lambda path: gsutil_binary_path + " cat " + AG_mlab. GSUTIL_PROTOCOL + AG_mlab.GSUTIL_ROOT + os.path.join( args.root_dir, path.strip("/"))) AG_mlab.generate_specfile_from_global_listing( gsutil_binary_path, args.root_dir, mlab_include_callback, mlab_specfile_callbacks, sys.stdout, max_retries=max_retries, compressed_listing_path=listing_path)
parser.print_help() sys.exit(1) num_threads = AG_args.get_num_threads_or_die( args ) max_retries = AG_args.get_num_retries_or_die( args ) try: gsutil_binary_path = args.gsutil_path except: gsutil_binary_path = GSUTIL_BINARY_PATH try: listing_path = args.listing_path except: listing_path = None blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists ) # make the hierarchy log.info("crawl gs://m-lab%s" % args.root_dir ) mlab_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists ) mlab_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec, dir_reval_sec_cb = lambda path: args.reval_sec, file_perm_cb = lambda path: file_perm, dir_perm_cb = lambda path: dir_perm, query_string_cb = lambda path: gsutil_binary_path + " cat " + AG_mlab.GSUTIL_PROTOCOL + AG_mlab.GSUTIL_ROOT + os.path.join( args.root_dir, path.strip("/") ) ) AG_mlab.generate_specfile_from_global_listing( gsutil_binary_path, args.root_dir, mlab_include_callback, mlab_specfile_callbacks, sys.stdout, max_retries=max_retries, compressed_listing_path=listing_path )
num_threads = AG_args.get_num_threads_or_die(args) max_retries = AG_args.get_num_retries_or_die(args) blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists) # make the hierarchy log.info("crawl %s" % "ftp://" + args.hostname[0] + args.root_dir) ftp_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists) ftp_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb=lambda path: args.reval_sec, dir_reval_sec_cb=lambda path: args.reval_sec, file_perm_cb=lambda path: file_perm, dir_perm_cb=lambda path: dir_perm, query_string_cb=lambda path: "ftp://" + args.hostname[ 0] + "/" + path.strip("/")) hierarchy = AG_ftp.build_hierarchy( args.hostname[0], args.root_dir, ftp_include_callback, ftp_specfile_callbacks, num_threads=num_threads, ftp_username=args.username, ftp_password=args.password, max_retries=max_retries, allow_partial_failure=(not args.fail_fast))
file_perm = 0 dir_perm = 0 try: file_perm = int( args.file_perm, 8 ) dir_perm = int( args.dir_perm, 8 ) except: print >> sys.stderr, "ERROR: invalid permission string" parser.print_help() sys.exit(1) num_threads = AG_args.get_num_threads_or_die( args ) max_retries = AG_args.get_num_retries_or_die( args ) blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists ) disk_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists ) disk_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec, dir_reval_sec_cb = lambda path: args.reval_sec, file_perm_cb = lambda path: file_perm, dir_perm_cb = lambda path: dir_perm, query_string_cb = lambda path: "/" + os.path.join( args.root[0].strip("/"), path.strip("/")) ) hierarchy = AG_disk.build_hierarchy( args.root[0], disk_include_callback, disk_specfile_callbacks, num_threads=num_threads, max_retries=max_retries ) if hierarchy is not None: specfile_text = AG_specfile.generate_specfile( {}, hierarchy ) print specfile_text
parser.print_help() sys.exit(1) num_threads = AG_args.get_num_threads_or_die(args) max_retries = AG_args.get_num_retries_or_die(args) blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists) disk_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists) disk_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb=lambda path: args.reval_sec, dir_reval_sec_cb=lambda path: args.reval_sec, file_perm_cb=lambda path: file_perm, dir_perm_cb=lambda path: dir_perm, query_string_cb=lambda path: "/" + os.path.join( args.root[0].strip("/"), path.strip("/"))) hierarchy = AG_disk.build_hierarchy(args.root[0], disk_include_callback, disk_specfile_callbacks, num_threads=num_threads, max_retries=max_retries) if hierarchy is not None: specfile_text = AG_specfile.generate_specfile({}, hierarchy) print specfile_text