def download_blob(bucket_name, source_blob_name, destination_file_name, credentials): storage_client = storage.Client.from_service_account_json(credentials) bucket = storage_client.bucket(bucket_name) blob = bucket.blob(source_blob_name) blob.download_to_filename(destination_file_name) print("File {} downloaded to {}.".format(source_blob_name, destination_file_name)) blob_check = bucket.get_blob(source_blob_name) blob_md5 = blob_check.md5_hash destination_md5 = md5_check.md5_file(destination_file_name) assert (blob_md5 == destination_md5), 'MD5 checksums do not match!!' print("MD5 checksums match for {}.".format(destination_file_name))
def writeFromGDrive(file_id, file_name, dest, md5): request = SERVICE.files().get_media(fileId=file_id) fh = io.FileIO(dest + '/' + file_name, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: try: status, done = downloader.next_chunk() except: fh.close() os.remove(location + filename) print('ERROR downloading file: ' + file_name) print(f'\rDownload {int(status.progress() * 100)}%.', end='') print('\n', flush=True) dest_md5 = md5_check.md5_file(os.path.join(dest + '/' + file_name)) if dest_md5 == md5: print('md5 checks out for ' + file_name) else: print('md5 error- something corrupted for ' + file_name)
def upload_blob(bucket_name, source_file_name, destination_blob_name, credentials): storage_client = storage.Client.from_service_account_json(credentials) bucket = storage_client.bucket(bucket_name) blob = bucket.blob(destination_blob_name) blob.upload_from_filename(source_file_name) print("File {} uploaded to {}.".format(source_file_name, destination_blob_name)) blob_check = bucket.get_blob(destination_blob_name) blob_md5 = blob_check.md5_hash source_md5 = md5_check.md5_file(source_file_name) if blob_md5 == source_md5: print("MD5 checksums match for {}.".format(destination_blob_name)) return True else: print('MD5 checksums do not match!!') return False
def uploader(upload_dirs, upparent_folder=PARENT_FOLDER): for item in upload_dirs: item_base = item.rstrip('/') item = item.rstrip('/').split('/')[-1] if folderInGDrive(item, upparent_folder) == False: folder_id = createGDriveFolder(item, upparent_folder) print('New Folder', item, folder_id) new_parent = folder_id elif folderInGDrive(item, upparent_folder)[0] == True: new_parent = folderInGDrive(item, upparent_folder)[1] one_dir = [] for dirpath, dirnames, filenames in os.walk(item_base, topdown=True): #add all directories one level down to list to then loop through next if dirpath.count(os.sep) - item_base.count(os.sep) == 1: one_dir.append(dirpath) #remove all the files that arent in the immediate directory if dirpath.count(os.sep) - item_base.count(os.sep) != 0: del dirnames[:] del filenames[:] #take just the remaining files and copy on over for f in filenames: if fileInGDrive(f, new_parent) == False: source_md5 = md5_check.md5_file(os.path.join(dirpath, f)) up_id = writeToGDrive(f, os.path.join(dirpath, f), new_parent, source_md5) up_md5 = SERVICE.files().get( fileId=up_id, fields='description').execute() if source_md5 != up_md5['description']: print('ERROR IN UPLOAD ' + f) else: print('md5 uploaded correctly for ' + f) #re-loop to next level uploader(one_dir, new_parent)
### Regular folder md5 check (no untar needed) files_start = [] for dirpath, dirnames, filenames in os.walk(dir_start): for f in filenames: files_start.append(os.path.join(dirpath, f)) dir_end = './download' files_end = [] for dirpath, dirnames, filenames in os.walk(dir_end): for f in filenames: files_end.append(os.path.join(dirpath, f)) for f_in in files_start: match_count = [] for f_out in files_end: if os.path.basename(f_in) == os.path.basename(f_out): f_in_md5 = md5_check.md5_file(f_in) f_out_md5 = md5_check.md5_file(f_out) if f_in_md5 == f_out_md5: match_count.append('md5 match for ' + os.path.basename(f_in) + ' and ' + os.path.basename(f_out)) if len(match_count) > 0: print('download md5 ok for ' + os.path.basename(f_in)) else: print('ERROR! download md5 bad for ' + os.path.basename(f_in)) print('Download step completed') #### untar and md5 check for BCL downloads bcl_out = './download/Raw_bcl' tar_out = './bcl_folders/down' sub_tar_out = []