def get_files_from_drive(drive_id: str, gdrive_service: Resource) -> list: """ Pull files from a specific google drive file """ files = [] page_token = None while True: try: param = {'q': f'"{drive_id}" in parents'} if page_token: param['pageToken'] = page_token gdrive_resp = gdrive_service.files().list(**param).execute() files += gdrive_resp['files'] page_token = gdrive_resp.get('nextPageToken') if not page_token: break except Exception as ex: logging.error('An error occurred: %s', ex) break return files
def save_version(gds: Resource, file_data: BytesIO, mimetype: str, file_id: str, filename: str) -> dict: """Uploads a new version of an existing file to Google Drive. Args: gds (Resource): google drive services. file_data (BytesIO): file content as a buffer. mimetype (str): MIME type of the file. file_id (str): Google Drive's id of the existing file. filename (str): filename of the file. Returns: dict: metadata of the uploaded file. """ log("Saving new version of %s", filename) media = MediaIoBaseUpload(file_data, mimetype=mimetype) response = (gds.files().update(fileId=file_id, keepRevisionForever=False, media_body=media).execute()) return response
def walk(origin_id: str, service: Resource, orig_path: str, item_details: Dict[str, str], out_stream, push_updates: bool, drive_path='~'): """ Traverses directories in Google Drive and replicates the file/folder structure similar to Google Drive. This method will create an equivalent `.strm` file for every media file found in a particular directory. The result will be the complete replication of entire directory structure with an strm file being generated for every media file, pointing to the original file on the internet. Parameters ----------- origin_id: String containing the id of the root/source directory. \n service: Instance of `Resource` object used to interact with Google Drive API. \n orig_path: Path to the directory in which strm files are to be placed once generated. This directory will be made by THIS method internally. \n item_details: Dictionary containing details of the directory being scanned from Drive. \n out_stream: Dictionary to which the output is to be written to once (during updates). \n push_updates: Boolean indicating if updates are to be pushed to the screen or not. \n """ global files_scanned, directories_scanned, bytes_scanned, files_skipped if not isinstance(origin_id, str) or not isinstance(service, Resource): raise TypeError('Unexpected argument type') # Updating the current path to be inside the path where this directory is to be created. cur_path = join(orig_path, item_details['name']) # Creating the root directory. mkdir(cur_path) page_token = None if push_updates: out_stream[0] = f'Scanning Directory: {shrink_path(drive_path)}/' out_stream[1] = '\n' # Blank line while True: result = service.files().list( # Getting the maximum number of items available in a single API call # to reduce the calls required. pageSize=1000, pageToken=page_token, # The fields that are to be included in the response. fields='files(name, id, mimeType, teamDriveId, size)', # Getting item from all drives, this allows scanning team-drives too. supportsAllDrives=True, includeItemsFromAllDrives=True, # Skipping trashed files and directories q=f"'{origin_id}' in parents and trashed=false" ).execute() for item in result['files']: if item['mimeType'] == 'application/vnd.google-apps.folder': # If the current object is a folder, incrementing the folder count and recursively # calling the same method over the new directory encountered. directories_scanned += 1 walk( origin_id=item['id'], service=service, orig_path=cur_path, item_details=item, out_stream=out_stream, push_updates=push_updates, drive_path=f'{join(drive_path, item["name"])}' ) elif 'video' in item['mimeType'] or match(r'.*\.(mkv|mp4)$', item['name']): # Scanning the file, creating an equivalent strm file if the file is a media file # Since the mime-type of files in drive can be modified externally, scanning a file # as a media file even if it has an extension of `.mp4` or `.mkv`. # Creating string to be placed inside the strm file to ensure that the file can be # parsed by the drive add-on. file_content = f'plugin://plugin.googledrive/?action=play&item_id={item["id"]}' if 'teamDriveId' in item: # Adding this part only for items present in a teamdrive. file_content += f'&item_driveid={item["teamDriveId"]}' \ f'&driveid={item["teamDriveId"]}' file_content += f'&content_type=video' with open(join(cur_path, item['name'] + '.strm'), 'w+') as f: f.write(file_content) # Updating the counter for files scanned as well as bytes scanned. files_scanned += 1 bytes_scanned += int(item['size']) else: # Skipping the file if the file is not a video file. Updating counter to increment # number of files that have been skipped. files_skipped += 1 if push_updates: # Updating counter on the screen if updates are to be pushed to the screen. update( files=files_scanned, directories=directories_scanned, skipped=files_skipped, size=bytes_scanned, out_stream=out_stream ) if 'nextPageToken' not in result: break
def __init__(self, drive_service: Resource, file_id: str): # download the entire spreadsheet as an excel file and store in a buffer self.sheet_io = io.BytesIO(drive_service.files().export( fileId=file_id, mimeType=GoogleSheet.SHEET_MIMETYPE).execute( num_retries=NUM_RETRIES))
def _get_folder_in_parent(drive: gad.Resource, path: str) -> Tuple[str, str]: """ Retrieve folder ID from given name and parent folder name. If not existing, it is created. Parameters: drive (gad.Resource): Service with which interacting with Google Drive. path (str): path = '{prefix}/{exchange}/{data_type}/{pair}/ {exchange}-{data_type}-{pair}-{int(timestamp)}.parquet' String from which is retrieved `prefix` (parent folder) and name of child folder '{exchange}-{data_type}-{pair}'. Returns: folder_id, folder_name (Tuple[str, str]): Id of child folder '{exchange}-{data_type}-{pair}'. Create it if not existing. """ # Retrieve parent folder (prefix), and child folder. path_struct = path.split('/') folder_name = '-'.join(path_struct[1:4]) if len(path_struct) > 5: # If larger than 5, it means prefix is more than a single folder. # This case is not supported. raise InconsistentStorage("Prefix {!s} appears to be a path. Only a single folder name is accepted.".format(folder_name)) parent_name = path_struct[0] # Retrieve candidates for child and parent folders. res = drive.files().list(q="(name = '" + parent_name + "' or name = '" + folder_name + "') and mimeType = 'application/vnd.google-apps.folder' and trashed = false", pageSize=20, fields='files(id, name, parents)').execute() folders = res.get('files', []) # Manage parent folder. p_folders = [(folder['id'], folder['name']) for folder in folders if folder['name'] == parent_name] if len(p_folders) > 1: # If more than 2 folders with the same name, throw an error. We do not # know which one is the right one to record data. raise InconsistentStorage("At least 2 parent folders identified with \ name {!s}. Please, make sure to provide a prefix corresponding to a unique \ folder name in your Google Drive space.".format(parent_name)) elif not p_folders: # If parent folder is not found, ask the user to create one. raise InconsistentStorage("No existing folder found with name {!s}. \ Please, make sure to provide a prefix corresponding to an existing and \ accessible folder.".format(parent_name)) else: p_folder_id = p_folders[0][0] # Manage child folder. c_folders = [(folder['id'], folder['name']) for folder in folders if ((folder['name'] == folder_name) and ('parents' in folder) and (p_folder_id in folder['parents']))] if len(c_folders) > 1: # If more than 2 folders with the same name, throw an error. We do not # know which one is the right one to record data. raise InconsistentStorage("At least 2 folders identified with name {!s}. Please, clean content of parent folder.".format(folder_name)) elif not c_folders: # If folder not found, create it. folder_metadata = {'name': folder_name, 'mimeType': 'application/vnd.google-apps.folder', 'parents': [p_folder_id]} folder = drive.files().create(body=folder_metadata, fields='id')\ .execute() return folder.get('id'), folder_name else: # Single folder found. return folders[0]['id'], folder_name
def _upload_file(self, gdrive_service: Resource, file: str, folder_ids: Dict[str, str]) -> Tuple[bool, int]: """Upload a file if it has changed Args: gdrive_service: Authenticated GDrive client file (str): Path to the file to be uploaded folder_ids (dict): Map of the workspace name to folder ids Returns: - (bool) Whether the file was updated - (int) Amount of data uploaded """ # Get the appropriate folder file_path = Path(file) folder_name = file_path.parent.name folder_id = folder_ids[folder_name] # See if the file already exists # Lookup the folder result = gdrive_service.files().list( q= f"name = '{file_path.name}' and '{folder_id}' in parents and trashed = false", pageSize=2, fields='files/id,files/md5Checksum,files/size').execute() hits = result.get('files', []) # Determine whether to upload the file if len(hits) > 1: raise ValueError('>1 file with this name in the backup directory') elif len(hits) == 1: # Otherwise, udate a new copy file_id = hits[0].get('id') logger.info(f'Matched existing file {file_id} to {file}') # Check if the file's md5 has has changed my_hash = md5() with open(file_path, 'rb') as fp: buff = fp.read(4096) while len(buff) > 0: my_hash.update(buff) buff = fp.read(4096) if my_hash.hexdigest() == hits[0].get('md5Checksum'): logger.info('MD5 checksum is unchanged. Skipping upload') return False, 0 # Update the file file_metadata = {'name': file_path.name} media = MediaFileUpload(file, mimetype='application/ld+json') result = gdrive_service.files().update(fileId=file_id, body=file_metadata, media_body=media, fields='id,size').execute() logger.info(f'Uploaded {file} to {result.get("id")}') return True, int(result.get('size')) else: # Upload the file file_metadata = {'name': file_path.name, 'parents': [folder_id]} media = MediaFileUpload(file, mimetype='application/ld+json') result = gdrive_service.files().create(body=file_metadata, media_body=media, fields='id,size').execute() logger.info(f'Uploaded {file} to {result.get("id")}') return True, int(result.get('size'))