def build_hierarchy(root_dir, gsutil_binary_path, include_cb, gs_specfile_cbs, max_retries=3, num_threads=2, allow_partial_failure=False): """ Build up the directory heirarchy and return it """ contexts = [] for i in xrange(0, num_threads): context = gsutil_context() context.gsutil_binary_path = gsutil_binary_path contexts.append(context) gs_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb, listdir_cb=gsutil_listdir, isdir_cb=gsutil_isdir) hierarchy = AG_crawl.build_hierarchy( contexts, root_dir, DRIVER_NAME, gs_crawler_cbs, gs_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries) return hierarchy
def build_hierarchy( root_dir, include_cb, disk_specfile_cbs, max_retries=1, num_threads=2, allow_partial_failure=False ): disk_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb, listdir_cb=disk_listdir, isdir_cb=disk_isdir ) hierarchy = AG_crawl.build_hierarchy( [root_dir] * num_threads, "/", DRIVER_NAME, disk_crawler_cbs, disk_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries ) return hierarchy
def build_hierarchy( root_dir, gsutil_binary_path, include_cb, gs_specfile_cbs, max_retries=3, num_threads=2, allow_partial_failure=False ): """ Build up the directory heirarchy and return it """ contexts = [] for i in xrange(0,num_threads): context = gsutil_context() context.gsutil_binary_path = gsutil_binary_path contexts.append( context ) gs_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb, listdir_cb=gsutil_listdir, isdir_cb=gsutil_isdir ) hierarchy = AG_crawl.build_hierarchy( contexts, root_dir, DRIVER_NAME, gs_crawler_cbs, gs_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries ) return hierarchy
def build_hierarchy(root_dir, include_cb, disk_specfile_cbs, max_retries=1, num_threads=2, allow_partial_failure=False): disk_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb, listdir_cb=disk_listdir, isdir_cb=disk_isdir) hierarchy = AG_crawl.build_hierarchy( [root_dir] * num_threads, "/", DRIVER_NAME, disk_crawler_cbs, disk_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries) return hierarchy
def build_hierarchy( hostname, root_dir, include_cb, ftp_specfile_cbs, num_threads=2, max_retries=3, ftp_username="******", ftp_password="", allow_partial_failure=False ): ftphost_pool = [] for i in xrange(0, num_threads): ftphost = ftputil.FTPHost( hostname, ftp_username, ftp_password ) # big cache ftphost.stat_cache.resize( 50000 ) ftphost.keep_alive() ftphost_pool.append( ftphost ) ftp_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb, listdir_cb=ftp_listdir, isdir_cb=ftp_isdir ) hierarchy = AG_crawl.build_hierarchy( ftphost_pool, root_dir, DRIVER_NAME, ftp_crawler_cbs, ftp_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries ) for ftphost in ftphost_pool: ftphost.close() return hierarchy
def build_hierarchy(hostname, root_dir, include_cb, ftp_specfile_cbs, num_threads=2, max_retries=3, ftp_username="******", ftp_password="", allow_partial_failure=False): ftphost_pool = [] for i in xrange(0, num_threads): ftphost = ftputil.FTPHost(hostname, ftp_username, ftp_password) # big cache ftphost.stat_cache.resize(50000) ftphost.keep_alive() ftphost_pool.append(ftphost) ftp_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb, listdir_cb=ftp_listdir, isdir_cb=ftp_isdir) hierarchy = AG_crawl.build_hierarchy( ftphost_pool, root_dir, DRIVER_NAME, ftp_crawler_cbs, ftp_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries) for ftphost in ftphost_pool: ftphost.close() return hierarchy