コード例 #1
0
ファイル: curate-ftp.py プロジェクト: fireshock199/syndicate
    parser.print_help()
    sys.exit(1)
 
 num_threads = AG_args.get_num_threads_or_die( args )
 num_retries = AG_args.get_max_retries_or_die( args )
    
 blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists )
 
 # make the hierarchy
 log.info("crawl %s" % "ftp://" + args.hostname[0] + args.root_dir )
 
 ftp_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists )
 
 ftp_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec,
                                                          dir_reval_sec_cb  = lambda path: args.reval_sec,
                                                          file_perm_cb      = lambda path: file_perm,
                                                          dir_perm_cb       = lambda path: dir_perm,
                                                          query_string_cb   = lambda path: "ftp://" + args.hostname[0] + os.path.join( args.root_dir, path.strip("/") ) )
 
 hierarchy = AG_ftp.build_hierarchy( args.hostname[0], args.root_dir, ftp_include_callback, ftp_specfile_callbacks, 
                                     num_threads           = num_threads,
                                     ftp_username          = args.username,
                                     ftp_password          = args.password,
                                     max_retries           = max_retries,
                                     allow_partial_failure = (not args.fail_fast) )
 
 if hierarchy is not None:
    specfile_text = AG_specfile.generate_specfile( {}, hierarchy )
 
    print specfile_text
    
コード例 #2
0
        listing_path = args.listing_path
    except:
        listing_path = None

    blacklists, whitelists = AG_acl.load_blacklists_and_whitelists(
        args.blacklists, args.whitelists)

    # make the hierarchy
    log.info("crawl gs://m-lab%s" % args.root_dir)

    mlab_include_callback = lambda path, is_directory: AG_acl.include_in_listing(
        path, is_directory, blacklists, whitelists)

    mlab_specfile_callbacks = AG_specfile.specfile_callbacks(
        file_reval_sec_cb=lambda path: args.reval_sec,
        dir_reval_sec_cb=lambda path: args.reval_sec,
        file_perm_cb=lambda path: file_perm,
        dir_perm_cb=lambda path: dir_perm,
        query_string_cb=lambda path: gsutil_binary_path + " cat " + AG_mlab.
        GSUTIL_PROTOCOL + AG_mlab.GSUTIL_ROOT + os.path.join(
            args.root_dir, path.strip("/")))

    AG_mlab.generate_specfile_from_global_listing(
        gsutil_binary_path,
        args.root_dir,
        mlab_include_callback,
        mlab_specfile_callbacks,
        sys.stdout,
        max_retries=max_retries,
        compressed_listing_path=listing_path)
コード例 #3
0
ファイル: curate-mlab.py プロジェクト: etherparty/syndicate
    parser.print_help()
    sys.exit(1)
 
 num_threads = AG_args.get_num_threads_or_die( args )
 max_retries = AG_args.get_num_retries_or_die( args )
 
 try:
    gsutil_binary_path = args.gsutil_path
 except:
    gsutil_binary_path = GSUTIL_BINARY_PATH
    
 try:
    listing_path = args.listing_path 
 except:
    listing_path = None
    
 blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists )
 
 # make the hierarchy
 log.info("crawl gs://m-lab%s" % args.root_dir )
 
 mlab_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists )
 
 mlab_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec,
                                                           dir_reval_sec_cb  = lambda path: args.reval_sec,
                                                           file_perm_cb      = lambda path: file_perm,
                                                           dir_perm_cb       = lambda path: dir_perm,
                                                           query_string_cb   = lambda path: gsutil_binary_path + " cat " + AG_mlab.GSUTIL_PROTOCOL + AG_mlab.GSUTIL_ROOT + os.path.join( args.root_dir, path.strip("/") ) )
 
 AG_mlab.generate_specfile_from_global_listing( gsutil_binary_path, args.root_dir, mlab_include_callback, mlab_specfile_callbacks, sys.stdout, max_retries=max_retries, compressed_listing_path=listing_path )
 
コード例 #4
0
    num_threads = AG_args.get_num_threads_or_die(args)
    max_retries = AG_args.get_num_retries_or_die(args)

    blacklists, whitelists = AG_acl.load_blacklists_and_whitelists(
        args.blacklists, args.whitelists)

    # make the hierarchy
    log.info("crawl %s" % "ftp://" + args.hostname[0] + args.root_dir)

    ftp_include_callback = lambda path, is_directory: AG_acl.include_in_listing(
        path, is_directory, blacklists, whitelists)

    ftp_specfile_callbacks = AG_specfile.specfile_callbacks(
        file_reval_sec_cb=lambda path: args.reval_sec,
        dir_reval_sec_cb=lambda path: args.reval_sec,
        file_perm_cb=lambda path: file_perm,
        dir_perm_cb=lambda path: dir_perm,
        query_string_cb=lambda path: "ftp://" + args.hostname[
            0] + "/" + path.strip("/"))

    hierarchy = AG_ftp.build_hierarchy(
        args.hostname[0],
        args.root_dir,
        ftp_include_callback,
        ftp_specfile_callbacks,
        num_threads=num_threads,
        ftp_username=args.username,
        ftp_password=args.password,
        max_retries=max_retries,
        allow_partial_failure=(not args.fail_fast))
コード例 #5
0
ファイル: curate-disk.py プロジェクト: etherparty/syndicate
 file_perm = 0
 dir_perm = 0
 
 try:
    file_perm = int( args.file_perm, 8 )
    dir_perm = int( args.dir_perm, 8 )
 except:
    print >> sys.stderr, "ERROR: invalid permission string"
    parser.print_help()
    sys.exit(1)
    
 num_threads = AG_args.get_num_threads_or_die( args )
 max_retries = AG_args.get_num_retries_or_die( args )
 
 blacklists, whitelists = AG_acl.load_blacklists_and_whitelists( args.blacklists, args.whitelists )
 
 disk_include_callback = lambda path, is_directory: AG_acl.include_in_listing( path, is_directory, blacklists, whitelists )
 
 disk_specfile_callbacks = AG_specfile.specfile_callbacks( file_reval_sec_cb = lambda path: args.reval_sec,
                                                           dir_reval_sec_cb  = lambda path: args.reval_sec,
                                                           file_perm_cb      = lambda path: file_perm,
                                                           dir_perm_cb       = lambda path: dir_perm,
                                                           query_string_cb   = lambda path: "/" + os.path.join( args.root[0].strip("/"), path.strip("/")) )
 
 hierarchy = AG_disk.build_hierarchy( args.root[0], disk_include_callback, disk_specfile_callbacks, num_threads=num_threads, max_retries=max_retries )
 
 if hierarchy is not None:
    specfile_text = AG_specfile.generate_specfile( {}, hierarchy )
    
    print specfile_text
    
コード例 #6
0
        parser.print_help()
        sys.exit(1)

    num_threads = AG_args.get_num_threads_or_die(args)
    max_retries = AG_args.get_num_retries_or_die(args)

    blacklists, whitelists = AG_acl.load_blacklists_and_whitelists(
        args.blacklists, args.whitelists)

    disk_include_callback = lambda path, is_directory: AG_acl.include_in_listing(
        path, is_directory, blacklists, whitelists)

    disk_specfile_callbacks = AG_specfile.specfile_callbacks(
        file_reval_sec_cb=lambda path: args.reval_sec,
        dir_reval_sec_cb=lambda path: args.reval_sec,
        file_perm_cb=lambda path: file_perm,
        dir_perm_cb=lambda path: dir_perm,
        query_string_cb=lambda path: "/" + os.path.join(
            args.root[0].strip("/"), path.strip("/")))

    hierarchy = AG_disk.build_hierarchy(args.root[0],
                                        disk_include_callback,
                                        disk_specfile_callbacks,
                                        num_threads=num_threads,
                                        max_retries=max_retries)

    if hierarchy is not None:
        specfile_text = AG_specfile.generate_specfile({}, hierarchy)

        print specfile_text