Beispiel #1
0
def build_hierarchy(root_dir,
                    gsutil_binary_path,
                    include_cb,
                    gs_specfile_cbs,
                    max_retries=3,
                    num_threads=2,
                    allow_partial_failure=False):
    """
   Build up the directory heirarchy and return it
   """

    contexts = []
    for i in xrange(0, num_threads):
        context = gsutil_context()

        context.gsutil_binary_path = gsutil_binary_path

        contexts.append(context)

    gs_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb,
                                                listdir_cb=gsutil_listdir,
                                                isdir_cb=gsutil_isdir)

    hierarchy = AG_crawl.build_hierarchy(
        contexts,
        root_dir,
        DRIVER_NAME,
        gs_crawler_cbs,
        gs_specfile_cbs,
        allow_partial_failure=allow_partial_failure,
        max_retries=max_retries)

    return hierarchy
Beispiel #2
0
def build_hierarchy( root_dir, include_cb, disk_specfile_cbs, max_retries=1, num_threads=2, allow_partial_failure=False ):
   
   disk_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb,
                                                  listdir_cb=disk_listdir,
                                                  isdir_cb=disk_isdir )
   
   hierarchy = AG_crawl.build_hierarchy( [root_dir] * num_threads, "/", DRIVER_NAME, disk_crawler_cbs, disk_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries )
   
   return hierarchy
Beispiel #3
0
def build_hierarchy( root_dir, gsutil_binary_path, include_cb, gs_specfile_cbs, max_retries=3, num_threads=2, allow_partial_failure=False ):   
   """
   Build up the directory heirarchy and return it
   """
   
   contexts = []
   for i in xrange(0,num_threads):
      context = gsutil_context()
      
      context.gsutil_binary_path = gsutil_binary_path
      
      contexts.append( context )
   
   gs_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb,
                                                listdir_cb=gsutil_listdir,
                                                isdir_cb=gsutil_isdir )
   
   hierarchy = AG_crawl.build_hierarchy( contexts, root_dir, DRIVER_NAME, gs_crawler_cbs, gs_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries )
   
   return hierarchy
Beispiel #4
0
def build_hierarchy(root_dir,
                    include_cb,
                    disk_specfile_cbs,
                    max_retries=1,
                    num_threads=2,
                    allow_partial_failure=False):

    disk_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb,
                                                  listdir_cb=disk_listdir,
                                                  isdir_cb=disk_isdir)

    hierarchy = AG_crawl.build_hierarchy(
        [root_dir] * num_threads,
        "/",
        DRIVER_NAME,
        disk_crawler_cbs,
        disk_specfile_cbs,
        allow_partial_failure=allow_partial_failure,
        max_retries=max_retries)

    return hierarchy
Beispiel #5
0
def build_hierarchy( hostname, root_dir, include_cb, ftp_specfile_cbs, num_threads=2, max_retries=3, ftp_username="******", ftp_password="", allow_partial_failure=False ):
   
   ftphost_pool = []
   
   for i in xrange(0, num_threads):
      ftphost = ftputil.FTPHost( hostname, ftp_username, ftp_password )
      
      # big cache 
      ftphost.stat_cache.resize( 50000 )
      ftphost.keep_alive()
      ftphost_pool.append( ftphost )
   
   ftp_crawler_cbs = AG_crawl.crawler_callbacks( include_cb=include_cb,
                                                 listdir_cb=ftp_listdir,
                                                 isdir_cb=ftp_isdir )
   
   hierarchy = AG_crawl.build_hierarchy( ftphost_pool, root_dir, DRIVER_NAME, ftp_crawler_cbs, ftp_specfile_cbs, allow_partial_failure=allow_partial_failure, max_retries=max_retries )
   
   for ftphost in ftphost_pool:
      ftphost.close()
      
   return hierarchy
Beispiel #6
0
def build_hierarchy(hostname,
                    root_dir,
                    include_cb,
                    ftp_specfile_cbs,
                    num_threads=2,
                    max_retries=3,
                    ftp_username="******",
                    ftp_password="",
                    allow_partial_failure=False):

    ftphost_pool = []

    for i in xrange(0, num_threads):
        ftphost = ftputil.FTPHost(hostname, ftp_username, ftp_password)

        # big cache
        ftphost.stat_cache.resize(50000)
        ftphost.keep_alive()
        ftphost_pool.append(ftphost)

    ftp_crawler_cbs = AG_crawl.crawler_callbacks(include_cb=include_cb,
                                                 listdir_cb=ftp_listdir,
                                                 isdir_cb=ftp_isdir)

    hierarchy = AG_crawl.build_hierarchy(
        ftphost_pool,
        root_dir,
        DRIVER_NAME,
        ftp_crawler_cbs,
        ftp_specfile_cbs,
        allow_partial_failure=allow_partial_failure,
        max_retries=max_retries)

    for ftphost in ftphost_pool:
        ftphost.close()

    return hierarchy