def get_files_from_drive(drive_id: str, gdrive_service: Resource) -> list:
    """
    Pull files from a specific google drive file
    """
    files = []
    page_token = None
    while True:
        try:
            param = {'q': f'"{drive_id}" in parents'}
            if page_token:
                param['pageToken'] = page_token
            gdrive_resp = gdrive_service.files().list(**param).execute()
            files += gdrive_resp['files']
            page_token = gdrive_resp.get('nextPageToken')
            if not page_token:
                break
        except Exception as ex:
            logging.error('An error occurred: %s', ex)
            break
    return files
Exemple #2
0
def save_version(gds: Resource, file_data: BytesIO, mimetype: str,
                 file_id: str, filename: str) -> dict:
    """Uploads a new version of an existing file to Google Drive.

    Args:
        gds (Resource): google drive services.
        file_data (BytesIO): file content as a buffer.
        mimetype (str): MIME type of the file.
        file_id (str): Google Drive's id of the existing file.
        filename (str): filename of the file.

    Returns:
        dict: metadata of the uploaded file.
    """

    log("Saving new version of %s", filename)
    media = MediaIoBaseUpload(file_data, mimetype=mimetype)
    response = (gds.files().update(fileId=file_id,
                                   keepRevisionForever=False,
                                   media_body=media).execute())

    return response
def walk(origin_id: str, service: Resource, orig_path: str, item_details: Dict[str, str],
         out_stream,
         push_updates: bool, drive_path='~'):
    """
        Traverses directories in Google Drive and replicates the file/folder structure similar to
        Google Drive.

        This method will create an equivalent `.strm` file for every media file found in a
        particular directory. The result will be the complete replication of entire directory
        structure with an strm file being generated for every media file, pointing to the original
        file on the internet.

        Parameters
        -----------
        origin_id: String containing the id of the root/source directory. \n
        service: Instance of `Resource` object used to interact with Google Drive API. \n
        orig_path: Path to the directory in which strm files are to be placed once generated. This
        directory will be made by THIS method internally. \n
        item_details: Dictionary containing details of the directory being scanned from Drive. \n
        out_stream: Dictionary to which the output is to be written to once (during updates). \n
        push_updates: Boolean indicating if updates are to be pushed to the screen or not. \n
    """

    global files_scanned, directories_scanned, bytes_scanned, files_skipped

    if not isinstance(origin_id, str) or not isinstance(service, Resource):
        raise TypeError('Unexpected argument type')

    # Updating the current path to be inside the path where this directory is to be created.
    cur_path = join(orig_path, item_details['name'])

    # Creating the root directory.
    mkdir(cur_path)

    page_token = None

    if push_updates:
        out_stream[0] = f'Scanning Directory: {shrink_path(drive_path)}/'
        out_stream[1] = '\n'  # Blank line

    while True:
        result = service.files().list(
            # Getting the maximum number of items available in a single API call
            # to reduce the calls required.
            pageSize=1000,
            pageToken=page_token,

            # The fields that are to be included in the response.
            fields='files(name, id, mimeType, teamDriveId, size)',

            # Getting item from all drives, this allows scanning team-drives too.
            supportsAllDrives=True,
            includeItemsFromAllDrives=True,

            # Skipping trashed files and directories
            q=f"'{origin_id}' in parents and trashed=false"
        ).execute()

        for item in result['files']:
            if item['mimeType'] == 'application/vnd.google-apps.folder':
                # If the current object is a folder, incrementing the folder count and recursively
                # calling the same method over the new directory encountered.
                directories_scanned += 1

                walk(
                    origin_id=item['id'],
                    service=service,
                    orig_path=cur_path,
                    item_details=item,
                    out_stream=out_stream,
                    push_updates=push_updates,
                    drive_path=f'{join(drive_path, item["name"])}'
                )
            elif 'video' in item['mimeType'] or match(r'.*\.(mkv|mp4)$', item['name']):
                # Scanning the file, creating an equivalent strm file if the file is a media file
                # Since the mime-type of files in drive can be modified externally, scanning a file
                # as a media file even if it has an extension of `.mp4` or `.mkv`.

                # Creating string to be placed inside the strm file to ensure that the file can be
                # parsed by the drive add-on.
                file_content = f'plugin://plugin.googledrive/?action=play&item_id={item["id"]}'
                if 'teamDriveId' in item:
                    # Adding this part only for items present in a teamdrive.
                    file_content += f'&item_driveid={item["teamDriveId"]}' \
                                    f'&driveid={item["teamDriveId"]}'

                file_content += f'&content_type=video'
                with open(join(cur_path, item['name'] + '.strm'), 'w+') as f:
                    f.write(file_content)

                # Updating the counter for files scanned as well as bytes scanned.
                files_scanned += 1
                bytes_scanned += int(item['size'])
            else:
                # Skipping the file if the file is not a video file. Updating counter to increment
                # number of files that have been skipped.
                files_skipped += 1

            if push_updates:
                # Updating counter on the screen if updates are to be pushed to the screen.
                update(
                    files=files_scanned,
                    directories=directories_scanned,
                    skipped=files_skipped,
                    size=bytes_scanned,
                    out_stream=out_stream
                )

        if 'nextPageToken' not in result:
            break
Exemple #4
0
 def __init__(self, drive_service: Resource, file_id: str):
     # download the entire spreadsheet as an excel file and store in a buffer
     self.sheet_io = io.BytesIO(drive_service.files().export(
         fileId=file_id, mimeType=GoogleSheet.SHEET_MIMETYPE).execute(
             num_retries=NUM_RETRIES))
Exemple #5
0
def _get_folder_in_parent(drive: gad.Resource, path: str) -> Tuple[str, str]:
    """
    Retrieve folder ID from given name and parent folder name.
    If not existing, it is created.

    Parameters:
        drive (gad.Resource):
            Service with which interacting with Google Drive.
        path (str):
            path = '{prefix}/{exchange}/{data_type}/{pair}/
                        {exchange}-{data_type}-{pair}-{int(timestamp)}.parquet'
            String from which is retrieved `prefix` (parent folder) and name of
            child folder '{exchange}-{data_type}-{pair}'.

    Returns:
        folder_id, folder_name (Tuple[str, str]):
            Id of child folder '{exchange}-{data_type}-{pair}'. Create it if
            not existing.

    """

    # Retrieve parent folder (prefix), and child folder.
    path_struct = path.split('/')
    folder_name = '-'.join(path_struct[1:4])
    if len(path_struct) > 5:
        # If larger than 5, it means prefix is more than a single folder.
        # This case is not supported.
        raise InconsistentStorage("Prefix {!s} appears to be a path. Only a single folder name is accepted.".format(folder_name))

    parent_name = path_struct[0]
    # Retrieve candidates for child and parent folders.
    res = drive.files().list(q="(name = '" + parent_name + "' or name = '"
                                           + folder_name + "') and mimeType = 'application/vnd.google-apps.folder' and trashed = false",
                             pageSize=20,
                             fields='files(id, name, parents)').execute()
    folders = res.get('files', [])

    # Manage parent folder.
    p_folders = [(folder['id'], folder['name']) for folder in folders
                 if folder['name'] == parent_name]
    if len(p_folders) > 1:
        # If more than 2 folders with the same name, throw an error. We do not
        # know which one is the right one to record data.
        raise InconsistentStorage("At least 2 parent folders identified with \
name {!s}. Please, make sure to provide a prefix corresponding to a unique \
folder name in your Google Drive space.".format(parent_name))
    elif not p_folders:
        # If parent folder is not found, ask the user to create one.
        raise InconsistentStorage("No existing folder found with name {!s}. \
Please, make sure to provide a prefix corresponding to an existing and \
accessible folder.".format(parent_name))
    else:
        p_folder_id = p_folders[0][0]

    # Manage child folder.
    c_folders = [(folder['id'], folder['name']) for folder in folders
                 if ((folder['name'] == folder_name) and ('parents' in folder)
                     and (p_folder_id in folder['parents']))]
    if len(c_folders) > 1:
        # If more than 2 folders with the same name, throw an error. We do not
        # know which one is the right one to record data.
        raise InconsistentStorage("At least 2 folders identified with name {!s}. Please, clean content of parent folder.".format(folder_name))
    elif not c_folders:
        # If folder not found, create it.
        folder_metadata = {'name': folder_name,
                           'mimeType': 'application/vnd.google-apps.folder',
                           'parents': [p_folder_id]}
        folder = drive.files().create(body=folder_metadata, fields='id')\
                              .execute()

        return folder.get('id'), folder_name
    else:
        # Single folder found.

        return folders[0]['id'], folder_name
Exemple #6
0
    def _upload_file(self, gdrive_service: Resource, file: str,
                     folder_ids: Dict[str, str]) -> Tuple[bool, int]:
        """Upload a file if it has changed

        Args:
            gdrive_service: Authenticated GDrive client
            file (str): Path to the file to be uploaded
            folder_ids (dict): Map of the workspace name to folder ids
        Returns:
            - (bool) Whether the file was updated
            - (int) Amount of data uploaded
        """
        # Get the appropriate folder
        file_path = Path(file)
        folder_name = file_path.parent.name
        folder_id = folder_ids[folder_name]

        # See if the file already exists
        # Lookup the folder
        result = gdrive_service.files().list(
            q=
            f"name = '{file_path.name}' and '{folder_id}' in parents and trashed = false",
            pageSize=2,
            fields='files/id,files/md5Checksum,files/size').execute()
        hits = result.get('files', [])

        # Determine whether to upload the file
        if len(hits) > 1:
            raise ValueError('>1 file with this name in the backup directory')
        elif len(hits) == 1:
            # Otherwise, udate a new copy
            file_id = hits[0].get('id')
            logger.info(f'Matched existing file {file_id} to {file}')

            # Check if the file's md5 has has changed
            my_hash = md5()
            with open(file_path, 'rb') as fp:
                buff = fp.read(4096)
                while len(buff) > 0:
                    my_hash.update(buff)
                    buff = fp.read(4096)
            if my_hash.hexdigest() == hits[0].get('md5Checksum'):
                logger.info('MD5 checksum is unchanged. Skipping upload')
                return False, 0

            # Update the file
            file_metadata = {'name': file_path.name}
            media = MediaFileUpload(file, mimetype='application/ld+json')
            result = gdrive_service.files().update(fileId=file_id,
                                                   body=file_metadata,
                                                   media_body=media,
                                                   fields='id,size').execute()
            logger.info(f'Uploaded {file} to {result.get("id")}')
            return True, int(result.get('size'))
        else:
            # Upload the file
            file_metadata = {'name': file_path.name, 'parents': [folder_id]}
            media = MediaFileUpload(file, mimetype='application/ld+json')
            result = gdrive_service.files().create(body=file_metadata,
                                                   media_body=media,
                                                   fields='id,size').execute()
            logger.info(f'Uploaded {file} to {result.get("id")}')
            return True, int(result.get('size'))