def download_blob(bucket_name, source_blob_name, destination_file_name,
                  credentials):
    storage_client = storage.Client.from_service_account_json(credentials)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    blob.download_to_filename(destination_file_name)
    print("File {} downloaded to {}.".format(source_blob_name,
                                             destination_file_name))
    blob_check = bucket.get_blob(source_blob_name)
    blob_md5 = blob_check.md5_hash
    destination_md5 = md5_check.md5_file(destination_file_name)

    assert (blob_md5 == destination_md5), 'MD5 checksums do not match!!'
    print("MD5 checksums match for {}.".format(destination_file_name))
Example #2
0
def writeFromGDrive(file_id, file_name, dest, md5):
    request = SERVICE.files().get_media(fileId=file_id)
    fh = io.FileIO(dest + '/' + file_name, 'wb')
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        try:
            status, done = downloader.next_chunk()
        except:
            fh.close()
            os.remove(location + filename)
            print('ERROR downloading file: ' + file_name)
        print(f'\rDownload {int(status.progress() * 100)}%.', end='')
    print('\n', flush=True)
    dest_md5 = md5_check.md5_file(os.path.join(dest + '/' + file_name))
    if dest_md5 == md5:
        print('md5 checks out for ' + file_name)
    else:
        print('md5 error- something corrupted for ' + file_name)
Example #3
0
def upload_blob(bucket_name, source_file_name, destination_blob_name,
                credentials):
    storage_client = storage.Client.from_service_account_json(credentials)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print("File {} uploaded to {}.".format(source_file_name,
                                           destination_blob_name))
    blob_check = bucket.get_blob(destination_blob_name)
    blob_md5 = blob_check.md5_hash
    source_md5 = md5_check.md5_file(source_file_name)

    if blob_md5 == source_md5:
        print("MD5 checksums match for {}.".format(destination_blob_name))
        return True
    else:
        print('MD5 checksums do not match!!')
        return False
def uploader(upload_dirs, upparent_folder=PARENT_FOLDER):
    for item in upload_dirs:
        item_base = item.rstrip('/')
        item = item.rstrip('/').split('/')[-1]
        if folderInGDrive(item, upparent_folder) == False:
            folder_id = createGDriveFolder(item, upparent_folder)
            print('New Folder', item, folder_id)
            new_parent = folder_id
        elif folderInGDrive(item, upparent_folder)[0] == True:
            new_parent = folderInGDrive(item, upparent_folder)[1]

        one_dir = []
        for dirpath, dirnames, filenames in os.walk(item_base, topdown=True):

            #add all directories one level down to list to then loop through next
            if dirpath.count(os.sep) - item_base.count(os.sep) == 1:
                one_dir.append(dirpath)
            #remove all the files that arent in the immediate directory
            if dirpath.count(os.sep) - item_base.count(os.sep) != 0:
                del dirnames[:]
                del filenames[:]
            #take just the remaining files and copy on over
            for f in filenames:
                if fileInGDrive(f, new_parent) == False:
                    source_md5 = md5_check.md5_file(os.path.join(dirpath, f))
                    up_id = writeToGDrive(f, os.path.join(dirpath, f),
                                          new_parent, source_md5)
                    up_md5 = SERVICE.files().get(
                        fileId=up_id, fields='description').execute()

                    if source_md5 != up_md5['description']:
                        print('ERROR IN UPLOAD ' + f)
                    else:
                        print('md5 uploaded correctly for ' + f)

        #re-loop to next level
        uploader(one_dir, new_parent)
Example #5
0
### Regular folder md5 check (no untar needed)
files_start = []
for dirpath, dirnames, filenames in os.walk(dir_start):
    for f in filenames:
        files_start.append(os.path.join(dirpath, f))
dir_end = './download'
files_end = []
for dirpath, dirnames, filenames in os.walk(dir_end):
    for f in filenames:
        files_end.append(os.path.join(dirpath, f))
for f_in in files_start:
    match_count = []
    for f_out in files_end:
        if os.path.basename(f_in) == os.path.basename(f_out):
            f_in_md5 = md5_check.md5_file(f_in)
            f_out_md5 = md5_check.md5_file(f_out)
            if f_in_md5 == f_out_md5:
                match_count.append('md5 match for ' + os.path.basename(f_in) +
                                   ' and ' + os.path.basename(f_out))
    if len(match_count) > 0:
        print('download md5 ok for ' + os.path.basename(f_in))
    else:
        print('ERROR! download md5 bad for ' + os.path.basename(f_in))

print('Download step completed')

#### untar and md5 check for BCL downloads
bcl_out = './download/Raw_bcl'
tar_out = './bcl_folders/down'
sub_tar_out = []