def download_seeds( the_file_object, the_file_basename, the_output_dir, the_number_threads, the_printing_depth ) : an_is_download_ok = False while not the_file_object.sealed() or not an_is_download_ok : a_worker_pool = WorkerPool( the_number_threads ) try: for a_seed_object in the_file_object : a_hex_md5 = a_seed_object.hex_md5() a_seed_path = os.path.join( the_output_dir, a_seed_object.basename() ) if os.path.exists( a_seed_path ) : a_file_pointer = open( a_seed_path, 'rb' ) a_md5 = compute_md5( a_file_pointer )[ 0 ] if a_hex_md5 == a_md5 : continue os.remove( a_seed_path ) pass print_d( "a_seed_path = '%s'\n" % a_seed_path, the_printing_depth ) a_worker_pool.charge( download_seed, ( a_seed_object, a_seed_path, the_printing_depth + 1 ) ) except: from cloudflu.common import print_traceback print_traceback( the_printing_depth ) pass a_worker_pool.shutdown() an_is_download_ok = a_worker_pool.is_all_right() print_d( "'%s'.uploaded() == %s\n" % ( the_file_object.located_file(), the_file_object.sealed() ), the_printing_depth ) pass return True
def upload_file( the_worker_pool, the_file_path, the_file_location, the_study_object, the_upload_seed_size, the_printing_depth ) : a_working_dir = generate_uploading_dir( the_file_path ) import shutil shutil.rmtree( a_working_dir, True ) os.makedirs( a_working_dir ) print_d( "a_working_dir = '%s'\n" % a_working_dir, the_printing_depth ) a_file_dirname = os.path.dirname( the_file_path ) a_file_basename = os.path.basename( the_file_path ) import tempfile a_tmp_file = tempfile.mkstemp( dir = a_working_dir )[ 1 ] # a_tmp_file = tempfile.mkstemp()[ 1 ] # use this work arround for FAT file systems sh_command( "cd '%s' && tar -czf %s '%s'" % ( a_file_dirname, a_tmp_file, a_file_basename ), the_printing_depth ) a_statinfo = os.stat( a_tmp_file ) print_d( "a_statinfo.st_size = %d, bytes\n" % a_statinfo.st_size, the_printing_depth ) import math a_suffix_length = math.log10( float( a_statinfo.st_size ) / the_upload_seed_size ) if a_suffix_length > 0 : a_suffix_length = int( a_suffix_length + 1.0 ) else: a_suffix_length = 0 pass print_d( "a_suffix_length = %d, digits\n" % a_suffix_length, the_printing_depth ) a_file_seed_target = os.path.join( a_working_dir, a_file_basename ) sh_command( "cat '%s' | split --bytes=%d --numeric-suffixes --suffix-length=%d - %s.tgz-" % ( a_tmp_file, the_upload_seed_size, a_suffix_length, a_file_seed_target ), the_printing_depth ) a_file_pointer = open( a_tmp_file, 'rb' ) a_md5 = compute_md5( a_file_pointer ) a_hex_md5, a_base64md5 = a_md5 a_file_pointer.close() os.remove( a_tmp_file ) a_file_object = TFileObject.create( the_study_object, the_file_path, the_file_location, a_hex_md5 ) print_d( "a_file_object = %s\n" % a_file_object, the_printing_depth ) pass
def create( the_file_object, the_seed_name, the_seed_path ) : a_file_pointer = open( the_seed_path, 'rb' ) from cloudflu.common import compute_md5 a_md5 = compute_md5( a_file_pointer ) a_hex_md5, a_base64md5 = a_md5 an_api_version = the_file_object.api_version() a_seed_name = generate_seed_name( the_file_object, a_hex_md5, the_seed_name, an_api_version ) a_seed_key = get_key( the_file_object._bucket, a_seed_name ) # a_part_key.set_contents_from_file( a_file_pointer, md5 = a_md5 ) # this method is not thread safe a_seed_key.set_contents_from_file( a_file_pointer, headers = { 'Content-Type' : 'application/x-tar' }, reduced_redundancy = True ) # To speed-up the preformance a_file_pointer.close() os.remove( the_seed_path ) return TSeedObject( the_file_object, a_seed_key, the_seed_name, a_hex_md5 )
def download_file( the_file_object, the_output_dir, the_number_threads, the_remove, the_fresh, the_callback ) : a_printing_depth = 0 print_d( "the_file_object = %s\n" % the_file_object, a_printing_depth ) a_hex_md5 = the_file_object.hex_md5() a_located_file = the_file_object.located_file() import os.path a_file_dirname = os.path.dirname( a_located_file ) a_file_basename = os.path.basename( a_located_file ) import os.path; an_output_dir = os.path.join( the_output_dir, a_file_dirname ) print_d( "an_output_dir = '%s'\n" % an_output_dir, a_printing_depth + 1 ) import os.path; a_file_path = os.path.join( an_output_dir, a_file_basename ) if the_fresh : sh_command( "rm -fr '%s.tgz*'" % ( a_file_path ), a_printing_depth + 1 ) sh_command( "rm -fr '%s'" % ( a_file_path ), a_printing_depth + 1 ) pass if not os.path.exists( an_output_dir ) : os.makedirs( an_output_dir ) pass print_d( "a_file_path = '%s'\n" % a_file_path, a_printing_depth + 2 ) if not os.path.exists( a_file_path ) : while True : download_seeds( the_file_object, a_file_basename, an_output_dir, the_number_threads, a_printing_depth + 3 ) an_archive_name = "%s.tgz" % a_file_basename import os.path an_archive_path = os.path.join( an_output_dir, an_archive_name ) if not os.path.exists( an_archive_path ) : sh_command( "cd '%s' && cat %s-* > %s" % ( an_output_dir, an_archive_name, an_archive_name ), a_printing_depth + 1 ) pass an_archive_pointer = open( an_archive_path, 'rb' ) a_md5 = compute_md5( an_archive_pointer )[ 0 ] print_d( "'%s' - %s\n" % ( a_hex_md5, ( a_hex_md5 == a_md5 ) ), a_printing_depth + 1 ) if a_hex_md5 == a_md5 : break import os; os.remove( an_archive_path ) pass sh_command( "tar -xzf '%s' -C '%s'" % ( an_archive_path, an_output_dir ), a_printing_depth + 1 ) sh_command( "cd '%s' && rm %s-*" % ( an_output_dir, an_archive_name ), a_printing_depth + 1 ) os.remove( an_archive_path ) else: print_d( "- nothing to be done, already downloaded\n", a_printing_depth + 1 ) pass if the_remove == True and os.path.exists( a_file_path ) : the_file_object.delete( the_number_threads, a_printing_depth + 1 ) pass if the_callback != None : the_callback( an_output_dir, a_located_file ) pass return True