Ejemplo n.º 1
0
def check_google():
    from pydrive2.auth import GoogleAuth
    from pydrive2.drive import GoogleDrive
    from pydrive2.auth import ServiceAccountCredentials

    gauth = GoogleAuth()
    scope = ['https://www.googleapis.com/auth/drive']
    cred_path = os.path.join(DATA_PATH, 'credentials.json')
    gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
        cred_path, scope)
    drive = GoogleDrive(gauth)
    file_id = '1603ahBNdt1SnSaYYBE-G8SA6qgRTQ6fF'
    file_list = drive.ListFile({
        'q':
        "'%s' in parents and trashed=false" % file_id
    }).GetList()

    df = pandas.DataFrame(file_list)
    dfclean = df[['createdDate', 'id', 'title']].copy()
    dfclean['date'] = pandas.to_datetime(dfclean['createdDate'],
                                         format='%Y-%m-%d',
                                         errors='coerce')
    lastupdate = dfclean.loc[dfclean['createdDate'] ==
                             '2020-09-11T01:53:29.639Z'].iloc[0]['date']
    dfnew = dfclean.loc[dfclean['date'] > lastupdate]

    all_files = os.listdir(REPORTS_PATH)
    new_files = [
        item for item in all_files
        if item not in dfnew['title'].unique().tolist()
    ]
    reportdf = dfnew.loc[dfnew['title'].isin(new_files)]
    return (reportdf)
Ejemplo n.º 2
0
def check_google():
    from pydrive2.auth import GoogleAuth
    from pydrive2.drive import GoogleDrive
    from pydrive2.auth import ServiceAccountCredentials

    gauth = GoogleAuth()
    scope = ['https://www.googleapis.com/auth/drive']
    gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
        'credentials.json', scope)
    drive = GoogleDrive(gauth)
    file_id = '1vC8oXhfhogAh7olq9BvEPdwwvyeXsZkk'
    file_list = drive.ListFile({
        'q':
        "'%s' in parents and trashed=false" % file_id
    }).GetList()

    df = pandas.DataFrame(file_list)
    dfclean = df[['createdDate', 'id', 'title']].copy()
    dfclean['date'] = pandas.to_datetime(dfclean['createdDate'],
                                         format='%Y-%m-%d',
                                         errors='coerce')
    lastupdate = dfclean.loc[dfclean['createdDate'] ==
                             '2020-09-28T22:28:33.989Z'].iloc[0]['date']
    dfnew = dfclean.loc[dfclean['date'] > lastupdate]

    all_files = os.listdir('data/tables/')
    new_files = [
        item for item in dfnew['title'].unique().tolist()
        if item not in all_files
    ]
    tabledf = dfnew.loc[dfnew['title'].isin(new_files)]
    return (tabledf)
Ejemplo n.º 3
0
    def search_folder(self, folderid: str):

        drive = GoogleDrive(self.gauth)
        file_list = drive.ListFile({
            'q':
            f"'{folderid}' in parents and trashed=false"
        }).GetList()
        return file_list
Ejemplo n.º 4
0
    def _parallel_uploader(self,
                           num_of_uploads,
                           num_of_workers,
                           use_per_thread_http=False):
        drive = GoogleDrive(self.ga)
        thread_pool = ThreadPoolExecutor(max_workers=num_of_workers)

        # Create list of gdrive_files.
        upload_files = []
        remote_name = test_util.CreateRandomFileName()
        for i in range(num_of_uploads):
            file_name = self.first_file if i % 2 == 0 else self.second_file
            up_file = drive.CreateFile()
            up_file["title"] = remote_name
            up_file.SetContentFile(file_name)
            upload_files.append(up_file)

        # Ensure there are no files with the random file name.
        files = pydrive_retry(lambda: drive.ListFile(
            param={
                "q": "title = '%s' and trashed = false" % remote_name
            }).GetList())
        self.assertTrue(len(files) == 0)

        # Submit upload jobs to ThreadPoolExecutor.
        futures = []
        for i in range(num_of_uploads):
            upload_worker = self.UploadWorker(upload_files[i],
                                              use_per_thread_http)
            futures.append(thread_pool.submit(upload_worker.run))

        # Ensure that all threads a) return, and b) encountered no exceptions.
        for future in as_completed(futures):
            self.assertIsNone(future.exception())
        thread_pool.shutdown()

        # Ensure all files were uploaded.
        files = pydrive_retry(lambda: drive.ListFile(
            param={
                "q": "title = '%s' and trashed = false" % remote_name
            }).GetList())
        self.assertTrue(len(files) == self.FILE_UPLOAD_COUNT)

        # Remove uploaded files.
        self.DeleteUploadedFiles(drive, [fi["id"] for fi in upload_files])
Ejemplo n.º 5
0
    def _parallel_uploader(self, num_of_uploads, num_of_workers):
        """
        :returns: list[str] of file IDs
        """
        drive = GoogleDrive(self.ga)
        thread_pool = ThreadPoolExecutor(max_workers=num_of_workers)
        first_file = self.getTempFile("first_file", "some string")
        second_file = self.getTempFile("second_file", "another string")

        # Create list of gdrive_files.
        upload_files = []
        remote_name = test_util.CreateRandomFileName()
        for i in range(num_of_uploads):
            file_name = first_file if i % 2 == 0 else second_file
            up_file = drive.CreateFile()
            up_file["title"] = remote_name
            up_file.SetContentFile(file_name)
            upload_files.append(up_file)

        # Ensure there are no files with the random file name.
        files = pydrive_retry(lambda: drive.ListFile(
            param={
                "q": "title = '%s' and trashed = false" % remote_name
            }).GetList())
        self.assertTrue(len(files) == 0)

        # Submit upload jobs to ThreadPoolExecutor.
        futures = []
        for up_file in upload_files:
            futures.append(thread_pool.submit(pydrive_retry, up_file.Upload))

        # Ensure that all threads a) return, and b) encountered no exceptions.
        for future in as_completed(futures):
            self.assertIsNone(future.exception())
        thread_pool.shutdown()

        # Ensure all files were uploaded.
        files = pydrive_retry(lambda: drive.ListFile(
            param={
                "q": "title = '%s' and trashed = false" % remote_name
            }).GetList())
        self.assertTrue(len(files) == self.FILE_UPLOAD_COUNT)

        return [fi["id"] for fi in upload_files]
Ejemplo n.º 6
0
class DVCSetup(object):
    def __init__(self, local_repo_path, model_id):
        self.repo_path = local_repo_path
        self.model_id = model_id
        GoogleAuth = set_secrets_file()
        gauth = GoogleAuth()
        gauth.LocalWebserverAuth()
        self.drive = GoogleDrive(gauth)

    def gdrive_setup(self):
        folder = self.drive.CreateFile({
            "title":
            self.model_id,
            "parents": [{
                "id": ISAURA_GDRIVE
            }],
            "mimeType":
            "application/vnd.google-apps.folder",
        })
        folder.Upload()

    def gdrive_folder_id(self):
        fileList = self.drive.ListFile({
            "q": "'" + ISAURA_GDRIVE + "' in parents and trashed=false",
            "corpora": "teamDrive",
            "teamDriveId": ISAURA_TEAM_GDRIVE,
            "includeTeamDriveItems": True,
            "supportsTeamDrives": True,
        }).GetList()

        for file in fileList:
            if file["title"] == self.model_id:
                return str(file["id"])

    def set_dvc_gdrive(self):
        terminal.run_command("dvc --cd {0} add data.h5".format(self.repo_path))
        terminal.run_command("dvc --cd " + self.repo_path +
                             " remote add -d public_repo gdrive://" +
                             self.gdrive_folder_id())
        cmd = "dvc --cd " + self.repo_path + " push"
        terminal.run_command(cmd, quiet=False)

    def git_add_and_commit(self, message="Set to public data repo"):
        cwd = os.getcwd()
        os.chdir(self.repo_path)
        terminal.run_command(
            "git add data.h5.dvc"
        )  # TODO: Be more specific in the files/folders to be added
        terminal.run_command("git commit -m '{0}'".format(message))
        os.chdir(cwd)
Ejemplo n.º 7
0
def _create_or_find_folder(folder_path: List[str], drive: GoogleDrive) -> str:
    parent = 'root'
    for name in folder_path:
        folders = drive.ListFile({
            'q': f"{MIME}='{FOLDER_TYPE}' AND title='{name}' and trashed=false AND '{parent}' in parents"
        }).GetList()
        if len(folders) == 0:  # no such folder
            new_folder = drive.CreateFile({'title': name, MIME: FOLDER_TYPE, 'parents': [{"id": parent}]})
            new_folder.Upload()
            parent = new_folder['id']
        elif len(folders) == 1:
            parent = folders[0]['id']
        else:
            raise ValueError("multiple folders with the same name")
    return parent
Ejemplo n.º 8
0
class Google_Drive(object):
    def __init__(self, auth):
        self.drive = GoogleDrive(auth)

    #create file
    def create(self, user_title):
        user_file = self.drive.CreateFile({'mimeType': 'application/vnd.google-apps.document', 'title': user_title})
        print(user_file)
        user_file.Upload()
        print(user_file)
        return user_file['alternateLink']


    def trash_files(self, file_title):
        # find the file with this name
        file_list = self.drive.ListFile({'q': "trashed = false"}).GetList()
        for file in file_list:
            if file['title'] == file_title:
                file.Trash()
Ejemplo n.º 9
0
 def test_03_Files_List_GetList_Iterate(self):
     drive = GoogleDrive(self.ga)
     flist = drive.ListFile({
         'q': "title = '%s' and trashed = false" % self.title,
         'maxResults': 2
     })
     files = []
     while True:
         try:
             x = pydrive_retry(flist.GetList)
             self.assertTrue(len(x) <= 2)
             files.extend(x)
         except StopIteration:
             break
     for file1 in self.file_list:
         found = False
         for file2 in files:
             if file1['id'] == file2['id']:
                 found = True
         self.assertEqual(found, True)
Ejemplo n.º 10
0
class RemoteDriveInterface:
    """A class to encapsulate all interactions with the Google Drive API"""

    def __init__(self) -> None:

        # Create and authenticate the user
        self.auth = GoogleAuth(settings_file='config/settings.yaml')

        # Then create a new google drive instance and cache all of the files in the drive
        self.drive = GoogleDrive(self.auth)
        self.files = {file['id']: file for file in self.drive.ListFile({'q': "trashed=false"}).GetList()}
        self.trash = {file['id']: file for file in self.drive.ListFile({'q': "trashed=true"}).GetList()}

        # Finally we need to find the root of this google drive so we first need to get all parent references - root is
        # not considered a normal file
        all_parents = [parent for file in self.files.values() for parent in file['parents']]
        self.drive_root = [parent for parent in all_parents if parent['isRoot']].pop()

    def update_file_manifest(self) -> None:
        """Updates the local file cache with all remote files. Just queries the API to get the files listings"""

        self.files = {file['id']: file for file in self.drive.ListFile({'q': "trashed=false"}).GetList()}
        self.trash = {file['id']: file for file in self.drive.ListFile({'q': "trashed=true"}).GetList()}

    @staticmethod
    def resolve_mnemonic_conflict(matching_filenames: List[GoogleDriveFile]) -> GoogleDriveFile:
        """
        A function to ask the user to resolve any mnemonic conflicts that can arise because of how Google Drive
        works. Since Google Drive has display names that are not unique and since we want to use traditional file
        pathing as much as possible it is possible that a filename may correspond to many different files. In that case
        we need to ask the user which one they meant to reference with their query

        Parameters:
            matching_filenames (List[GoogleDriveFile]): A list of all google drive files matched the query by the user

        Returns:
            file_ids[response] (GoogleDriveFile): The GoogleDriveFile whose ID was entered by the user when prompted
        """

        # Cache a dictionary linking all file ids to their corresponding file objects
        file_ids = {file['id']: file for file in matching_filenames}

        while 1:

            # Let the user know what is going wrong before anything else
            print_formatted_text(ANSI("\x1b[31mThere are multiple files with the same filename given!\n"))

            # Until the user provides the info we want keep printing the matching files
            for file in matching_filenames:
                pretty_date = file['modifiedDate'][:file['modifiedDate'].index('.')].replace('T', ' ')
                print_formatted_text(ANSI(f"\x1b[36mDisplay Name: \x1b[37m{file['title']} \x1b[36mLast Modified: "
                                          f"\x1b[37m{pretty_date} \x1b[36mFile ID: \x1b[37m{file['id']}"))

            # Newline for terminal readability and prompt the user to resolve the conflict
            print_formatted_text("")
            response = prompt(ANSI('\x1b[31mPlease copy/paste the ID of the file to which you were referring: '))

            # If the user's response is a valid key then return the associated file
            if response in file_ids.keys():
                return file_ids[response]

    @staticmethod
    def resolve_file_conversion(remote_file: GoogleDriveFile) -> str:
        """
        A function to take in a remote file and return to the caller a string that represents the file extension to use
        for this file. This exists to let users who may not know what conversion are available see all options that are
        available to them given a document type so they can download the file as the one best suited to their needs.

        Parameters:
            remote_file (GoogleDriveFile): The remote file who we are attempting to get the conversion for

        Returns:
            conversion_opts[user_input] (str): The file extension we are converting this file to
        """

        while 1:

            # Print the helpful prompt on what the user is choosing and cache the supported conversions list for this
            # file
            print_formatted_text(ANSI(f"\x1b[36mWhat file type would you like to convert \"{remote_file['title']}\" to?"))
            conversion_opts = [ext for ext, link in SUPPORTED_FILE_TYPES.items() if link in remote_file['exportLinks']]

            # Print out all of the possible conversion's for this document and their associated number
            for choice, conversion in enumerate(conversion_opts):
                print_formatted_text(f"[{choice + 1}]: {conversion}")

            try:
                # Prompt the user for their choice of the file types printed out above
                user_input = int(prompt(ANSI(f'\x1b[36mChoose [1-{len(conversion_opts)}]: '))) - 1

                # If the user input a valid index then return the conversion extension they desire
                if 0 <= user_input < len(conversion_opts):
                    return conversion_opts[user_input]

            # If the user input a non integer cast-able value then inform them to use the numbers
            except ValueError:
                print_formatted_text(ANSI('\x1b[31mPlease input the integer that corresponds to your desired file type'))

    def validate_remote_path(self, file: GoogleDriveFile, remote_path: str, search_trash=False) -> bool:
        """
        Given a valid remote GoogleDriveFile and a semantic file path validate that using this path we could access
        this same file and if we can then return true however if this file path cannot lead us to the given file file
        then return false

        Parameters:
            file (GoogleDriveFile): The GoogleDriveFile we are starting from to validate the path
            remote_path (str): The absolute path to the file that we are trying to validate
            search_trash (bool): Lets us know if we want to search for trashed files when validating this path

        Returns:
            A boolean to let us know if the given remote path is valid or not
        """

        # Get all the files to check against, the parent ids of the file, and create a new path 
        files_to_search = list(self.files.values()) + list(self.trash.values()) if search_trash else self.files.values()
        parent_ids = [parent['id'] for parent in file['parents']]
        to_validate = Path(remote_path)

        # If the title of the file is not the same as the last part of the given path then it is trivially false
        if file['title'] != to_validate.name:
            return False

        # If the file name matches the file path and the root node is a parent then it is trivially true
        elif self.drive_root['id'] in parent_ids:
            return True

        # We need to investigate further, look at all files and see if their id matches a parent and attempt to
        # validate them too if non validate correctly then return false
        for check in files_to_search:
            if check['id'] in parent_ids and self.validate_remote_path(check, to_validate.parent, search_trash=search_trash):
                return True

        return False

    def get_remote_file(self, filename: str, trashed=False) -> GoogleDriveFile:
        """
        Using a given file path search the Google Drive instance for any files matching that name and file path. If
        there are many instances of a files that match the given file path and name then prompt the user to specify
        which file they were attempting to access as we cannot determine this for ourselves at this point.

        Parameters:
            filename (str): The absolute remote path of the file we are searching for
            trashed (bool): Lets us know if we want to search the trash for this file

        Returns:
            Either a GoogleDriveFile if one was found that matches or None if no file matched the query
        """

        # Convert the filename to a file path for easier manipulation and create a list of matching file names
        files_to_check = self.trash.values() if trashed else self.files.values()
        full_path, matching_files = Path(filename), list()

        # Make sure that the file path is not the root before continuing
        if not full_path.name:
            return self.drive_root

        # Look over all files and if their filename matches the given one then validate that their full paths match
        for file in files_to_check:
            if file['title'] == full_path.name and self.validate_remote_path(file, full_path, search_trash=trashed):
                matching_files.append(file)

        # If we match any files then we definitely found a file but may need to resolve a same name issue with user
        if num_matches := len(matching_files):
            return matching_files.pop() if num_matches == 1 else self.resolve_mnemonic_conflict(matching_files)
Ejemplo n.º 11
0
class PyDriveBackend(duplicity.backend.Backend):
    u"""Connect to remote store using PyDrive API"""
    def __init__(self, parsed_url):
        duplicity.backend.Backend.__init__(self, parsed_url)
        try:
            import httplib2
            from apiclient.discovery import build
        except ImportError as e:
            raise BackendException(u"""\
PyDrive backend requires PyDrive and Google API client installation.
Please read the manpage for setup details.
Exception: %s""" % str(e))

        # Shared Drive ID specified as a query parameter in the backend URL.
        # Example: pydrive://developer.gserviceaccount.com/target-folder/?driveID=<SHARED DRIVE ID>
        self.api_params = {}
        self.shared_drive_id = None
        if u'driveID' in parsed_url.query_args:
            self.shared_drive_id = parsed_url.query_args[u'driveID'][0]
            self.api_params = {
                u'corpora': u'teamDrive',
                u'teamDriveId': self.shared_drive_id,
                u'includeTeamDriveItems': True,
                u'supportsTeamDrives': True
            }

        try:
            from pydrive2.auth import GoogleAuth
            from pydrive2.drive import GoogleDrive
            from pydrive2.files import ApiRequestError, FileNotUploadedError
        except ImportError as e:
            try:
                from pydrive.auth import GoogleAuth
                from pydrive.drive import GoogleDrive
                from pydrive.files import ApiRequestError, FileNotUploadedError
            except ImportError as e:
                raise BackendException(u"""\
PyDrive backend requires PyDrive installation.  Please read the manpage for setup details.
Exception: %s""" % str(e))

        # let user get by with old client while he can
        try:
            from oauth2client.client import SignedJwtAssertionCredentials
            self.oldClient = True
        except:
            from oauth2client.service_account import ServiceAccountCredentials
            from oauth2client import crypt
            self.oldClient = False

        if u'GOOGLE_DRIVE_ACCOUNT_KEY' in os.environ:
            account_key = os.environ[u'GOOGLE_DRIVE_ACCOUNT_KEY']
            if self.oldClient:
                credentials = SignedJwtAssertionCredentials(
                    parsed_url.username + u'@' + parsed_url.hostname,
                    account_key,
                    scopes=u'https://www.googleapis.com/auth/drive')
            else:
                signer = crypt.Signer.from_string(account_key)
                credentials = ServiceAccountCredentials(
                    parsed_url.username + u'@' + parsed_url.hostname,
                    signer,
                    scopes=u'https://www.googleapis.com/auth/drive')
            credentials.authorize(httplib2.Http())
            gauth = GoogleAuth(http_timeout=60)
            gauth.credentials = credentials
        elif u'GOOGLE_DRIVE_SETTINGS' in os.environ:
            gauth = GoogleAuth(
                settings_file=os.environ[u'GOOGLE_DRIVE_SETTINGS'],
                http_timeout=60)
            gauth.CommandLineAuth()
        elif (u'GOOGLE_SECRETS_FILE' in os.environ
              and u'GOOGLE_CREDENTIALS_FILE' in os.environ):
            gauth = GoogleAuth(http_timeout=60)
            gauth.LoadClientConfigFile(os.environ[u'GOOGLE_SECRETS_FILE'])
            gauth.LoadCredentialsFile(os.environ[u'GOOGLE_CREDENTIALS_FILE'])
            if gauth.credentials is None:
                gauth.CommandLineAuth()
            elif gauth.access_token_expired:
                gauth.Refresh()
            else:
                gauth.Authorize()
            gauth.SaveCredentialsFile(os.environ[u'GOOGLE_CREDENTIALS_FILE'])
        else:
            raise BackendException(
                u'GOOGLE_DRIVE_ACCOUNT_KEY or GOOGLE_DRIVE_SETTINGS environment '
                u'variable not set. Please read the manpage to fix.')
        self.drive = GoogleDrive(gauth)

        if self.shared_drive_id:
            parent_folder_id = self.shared_drive_id
        else:
            # Dirty way to find root folder id
            file_list = self.drive.ListFile({
                u'q':
                u"'Root' in parents and trashed=false"
            }).GetList()
            if file_list:
                parent_folder_id = file_list[0][u'parents'][0][u'id']
            else:
                file_in_root = self.drive.CreateFile(
                    {u'title': u'i_am_in_root'})
                file_in_root.Upload()
                parent_folder_id = file_in_root[u'parents'][0][u'id']
                file_in_root.Delete()

        # Fetch destination folder entry and create hierarchy if required.
        folder_names = parsed_url.path.split(u'/')
        for folder_name in folder_names:
            if not folder_name:
                continue
            list_file_args = {
                u'q':
                u"'" + parent_folder_id + u"' in parents and trashed=false"
            }
            list_file_args.update(self.api_params)
            file_list = self.drive.ListFile(list_file_args).GetList()
            folder = next(
                (item
                 for item in file_list if item[u'title'] == folder_name and
                 item[u'mimeType'] == u'application/vnd.google-apps.folder'),
                None)
            if folder is None:
                create_file_args = {
                    u'title': folder_name,
                    u'mimeType': u"application/vnd.google-apps.folder",
                    u'parents': [{
                        u'id': parent_folder_id
                    }]
                }
                create_file_args[u'parents'][0].update(self.api_params)
                create_file_args.update(self.api_params)
                folder = self.drive.CreateFile(create_file_args)
                if self.shared_drive_id:
                    folder.Upload(param={u'supportsTeamDrives': True})
                else:
                    folder.Upload()
            parent_folder_id = folder[u'id']
        self.folder = parent_folder_id
        self.id_cache = {}

    def file_by_name(self, filename):
        try:
            from pydrive2.files import ApiRequestError  # pylint: disable=import-error
        except ImportError:
            from pydrive.files import ApiRequestError  # pylint: disable=import-error

        filename = util.fsdecode(
            filename)  # PyDrive deals with unicode filenames

        if filename in self.id_cache:
            # It might since have been locally moved, renamed or deleted, so we
            # need to validate the entry.
            file_id = self.id_cache[filename]
            drive_file = self.drive.CreateFile({u'id': file_id})
            try:
                if drive_file[u'title'] == filename and not drive_file[
                        u'labels'][u'trashed']:
                    for parent in drive_file[u'parents']:
                        if parent[u'id'] == self.folder:
                            log.Info(
                                u"PyDrive backend: found file '%s' with id %s in ID cache"
                                % (filename, file_id))
                            return drive_file
            except ApiRequestError as error:
                # A 404 occurs if the ID is no longer valid
                if error.args[0].resp.status != 404:
                    raise
            # If we get here, the cache entry is invalid
            log.Info(
                u"PyDrive backend: invalidating '%s' (previously ID %s) from ID cache"
                % (filename, file_id))
            del self.id_cache[filename]

        # Not found in the cache, so use directory listing. This is less
        # reliable because there is no strong consistency.
        q = u"title='%s' and '%s' in parents and trashed=false" % (filename,
                                                                   self.folder)
        fields = u'items(title,id,fileSize,downloadUrl,exportLinks),nextPageToken'
        list_file_args = {u'q': q, u'fields': fields}
        list_file_args.update(self.api_params)
        flist = self.drive.ListFile(list_file_args).GetList()
        if len(flist) > 1:
            log.FatalError(
                _(u"PyDrive backend: multiple files called '%s'.") %
                (filename, ))
        elif flist:
            file_id = flist[0][u'id']
            self.id_cache[filename] = flist[0][u'id']
            log.Info(u"PyDrive backend: found file '%s' with id %s on server, "
                     u"adding to cache" % (filename, file_id))
            return flist[0]
        log.Info(
            u"PyDrive backend: file '%s' not found in cache or on server" %
            (filename, ))
        return None

    def id_by_name(self, filename):
        drive_file = self.file_by_name(filename)
        if drive_file is None:
            return u''
        else:
            return drive_file[u'id']

    def _put(self, source_path, remote_filename):
        remote_filename = util.fsdecode(remote_filename)
        drive_file = self.file_by_name(remote_filename)
        if drive_file is None:
            # No existing file, make a new one
            create_file_args = {
                u'title': remote_filename,
                u'parents': [{
                    u"kind": u"drive#fileLink",
                    u"id": self.folder
                }]
            }
            create_file_args[u'parents'][0].update(self.api_params)
            drive_file = self.drive.CreateFile(create_file_args)
            log.Info(u"PyDrive backend: creating new file '%s'" %
                     (remote_filename, ))
        else:
            log.Info(
                u"PyDrive backend: replacing existing file '%s' with id '%s'" %
                (remote_filename, drive_file[u'id']))
        drive_file.SetContentFile(util.fsdecode(source_path.name))
        if self.shared_drive_id:
            drive_file.Upload(param={u'supportsTeamDrives': True})
        else:
            drive_file.Upload()
        self.id_cache[remote_filename] = drive_file[u'id']

    def _get(self, remote_filename, local_path):
        drive_file = self.file_by_name(remote_filename)
        drive_file.GetContentFile(util.fsdecode(local_path.name))

    def _list(self):
        list_file_args = {
            u'q': u"'" + self.folder + u"' in parents and trashed=false",
            u'fields': u'items(title,id),nextPageToken'
        }
        list_file_args.update(self.api_params)
        drive_files = self.drive.ListFile(list_file_args).GetList()
        filenames = set(item[u'title'] for item in drive_files)
        # Check the cache as well. A file might have just been uploaded but
        # not yet appear in the listing.
        # Note: do not use iterkeys() here, because file_by_name will modify
        # the cache if it finds invalid entries.
        for filename in list(self.id_cache.keys()):
            if (filename not in filenames) and (self.file_by_name(filename)
                                                is not None):
                filenames.add(filename)
        return list(filenames)

    def _delete(self, filename):
        file_id = self.id_by_name(filename)
        if file_id == u'':
            log.Warn(u"File '%s' does not exist while trying to delete it" %
                     (util.fsdecode(filename), ))
        elif self.shared_drive_id:
            self.drive.auth.service.files().delete(fileId=file_id,
                                                   param={
                                                       u'supportsTeamDrives':
                                                       True
                                                   }).execute()
        else:
            self.drive.auth.service.files().delete(fileId=file_id).execute()

    def _query(self, filename):
        drive_file = self.file_by_name(filename)
        if drive_file is None:
            size = -1
        else:
            size = int(drive_file[u'fileSize'])
        return {u'size': size}

    def _error_code(self, operation, error):  # pylint: disable=unused-argument
        try:
            from pydrive2.files import ApiRequestError, FileNotUploadedError  # pylint: disable=import-error
        except ImportError:
            from pydrive.files import ApiRequestError, FileNotUploadedError  # pylint: disable=import-error
        if isinstance(error, FileNotUploadedError):
            return log.ErrorCode.backend_not_found
        elif isinstance(error, ApiRequestError):
            return log.ErrorCode.backend_permission_denied
        return log.ErrorCode.backend_error
Ejemplo n.º 12
0
class GoogleDriveStorage(Storage):

    QUERY_FOLDER = 'title=\'{}\' and trashed=false and mimeType=\'{}\''
    QUERY_FILE_BY_NAME = 'title=\'{}\' and trashed=false and \'{}\' in parents'
    QUERY_FILE_LIST_IN_FOLDER = 'trashed=false and \'{}\' in parents'

    MIME_TYPE_FOLDER = 'application/vnd.google-apps.folder'

    def __init__(self, drive_path, drive_config):
        self._storage = None
        self._drive_path = drive_path
        self._drive_path_id = None
        self._credentials_path = drive_config['credentials-path']

        super().__init__()

    def connect(self):
        if self._storage is None:
            self._storage = GoogleDrive(self.__authenticate())
            self._drive_path_id = self.__get_drive_path_id()

    def put(self, key_path, file_path):

        if not os.path.exists(file_path):
            log.error(output_messages['ERROR_NOT_FOUND'] % file_path,
                      class_name=GDRIVE_STORAGE)
            return False

        file_metadata = {
            'title': key_path,
            'parents': [{
                'id': self._drive_path_id
            }]
        }
        try:
            self.__upload_file(file_path, file_metadata)
        except Exception:
            raise RuntimeError(
                output_messages['ERROR_FILE_COULD_NOT_BE_UPLOADED'] %
                file_path)

        return True

    @_should_retry
    def __upload_file(self, file_path, file_metadata):
        file = self._storage.CreateFile(metadata=file_metadata)
        file.SetContentFile(file_path)
        file.Upload()

    def get(self, file_path, reference):
        file_info = self.get_file_info_by_name(reference)

        if not file_info:
            log.error(output_messages['ERROR_NOT_FOUND'] % reference,
                      class_name=GDRIVE_STORAGE)
            return False

        self.download_file(file_path, file_info)
        return True

    def get_by_id(self, file_path, file_id):
        try:
            file_info = self._storage.CreateFile({'id': file_id})
            file_info.FetchMetadata(fields='id,mimeType,title')
        except errors.HttpError as error:
            log.error(error, class_name=GDRIVE_STORAGE)
            return False

        if not file_info:
            log.error(output_messages['ERROR_NOT_FOUND'] % file_id,
                      class_name=GDRIVE_STORAGE)
            return False

        file_path = os.path.join(file_path, file_info.get('title'))
        self.download_file(file_path, file_info)
        return True

    @_should_retry
    def _download_file(self, id, file_path):
        file = self._storage.CreateFile({'id': id})
        file.GetContentFile(file_path)

    def download_file(self, file_path, file_info):
        file_id = file_info.get('id')
        if file_info.get('mimeType') == self.MIME_TYPE_FOLDER:
            self.__download_folder(file_path, file_id)
            return

        self._download_file(file_id, file_path)

    @_should_retry
    def get_file_info_by_name(self, file_name):
        query = {
            'q': self.QUERY_FILE_BY_NAME.format(file_name,
                                                self._drive_path_id),
            'maxResults': 1
        }

        file_list = self._storage.ListFile(query).GetList()
        if file_list:
            return file_list.pop()

        return None

    def __authenticate(self):
        credentials_full_path = os.path.join(self._credentials_path,
                                             'credentials.json')
        token = os.path.join(self._credentials_path, 'credentials')

        gauth = GoogleAuth()
        gauth.LoadClientConfigFile(credentials_full_path)

        if os.path.exists(token):
            gauth.LoadCredentialsFile(token)

        cred = gauth.credentials
        if not cred or cred.invalid:
            if cred and cred.access_token_expired and cred.refresh_token:
                gauth.Refresh()
            else:
                gauth.LocalWebserverAuth()
            gauth.SaveCredentialsFile(token)
        return gauth

    def bucket_exists(self):
        if self._drive_path_id:
            return True
        return False

    def __get_drive_path_id(self):

        query = {
            'q': self.QUERY_FOLDER.format(self._drive_path,
                                          self.MIME_TYPE_FOLDER),
            'maxResults': 1
        }
        try:
            bucket = self._storage.ListFile(query).GetList()
            if not bucket:
                return None
            return bucket.pop()['id']
        except ApiRequestError as e:
            log.debug(e, class_name=GDRIVE_STORAGE)

        return None

    def key_exists(self, key_path):
        file_info = self.get_file_info_by_name(key_path)
        if file_info:
            return True
        return False

    def list_files_from_path(self, path):
        if not self._drive_path_id:
            raise RuntimeError(output_messages['ERROR_BUCKET_NOT_FOUND'] %
                               self._drive_path)
        files_in_folder = self._storage.ListFile({
            'q':
            self.QUERY_FILE_BY_NAME.format(path, self._drive_path_id)
        }).GetList()
        return [file.get('title') for file in files_in_folder]

    def list_files_in_folder(self, parent_id):
        return self._storage.ListFile({
            'q':
            self.QUERY_FILE_LIST_IN_FOLDER.format(parent_id)
        }).GetList()

    def __download_folder(self, file_path, folder_id):

        files_in_folder = self.list_files_in_folder(folder_id)
        for file in files_in_folder:
            complete_file_path = os.path.join(file_path, file.get('title'))
            ensure_path_exists(file_path)
            self.download_file(complete_file_path, file)

    def import_file_from_url(self, path_dst, url):
        file_id = self.get_file_id_from_url(url)
        if not file_id:
            raise RuntimeError(output_messages['ERROR_INVALID_URL'] % url)
        if not self.get_by_id(path_dst, file_id):
            raise RuntimeError(output_messages['ERROR_FILE_DOWNLOAD_FAILED'] %
                               file_id)

    @staticmethod
    def get_file_id_from_url(url):
        url_parsed = urlparse(url)
        query = parse_qs(url_parsed.query)
        query_file_id = query.get('id', [])
        if query_file_id:
            return query_file_id[0]
        url_parts = url_parsed.path.split('/')
        folder = 'folders'
        min_size = 2
        if folder in url_parts:
            file_id_index = url_parts.index(folder) + 1
            return url_parts[file_id_index]
        if len(url_parts) > min_size:
            return url_parts[-2]
        return None
class DriveHandler:

    def __init__(self):
        gauth = GoogleAuth()
        gauth.LoadCredentialsFile("google_credentials.txt")
        if gauth.credentials is None:
            gauth.LocalWebserverAuth()
        elif gauth.access_token_expired:
            gauth.Refresh()
        else:
            gauth.Authorize()
        gauth.SaveCredentialsFile("google_credentials.txt")
        self.drive = GoogleDrive(gauth)

    def refresh_drive(self, current_date):
        logging.info("Refreshing the image contents of the drive...")
        folder_list = self.drive.ListFile(
            {"q": "mimeType='application/vnd.google-apps.folder' and trashed=true"}
        ).GetList()
        for folder in folder_list:
            folder_to_delete = self.drive.CreateFile({"id": folder["id"]})
            print(folder_to_delete)
            folder_to_delete.Delete()
        folders_to_trash = []
        date_modifier = timedelta(days=1)
        folder_list = self.drive.ListFile(
            {"q": "mimeType='application/vnd.google-apps.folder' and trashed=false"}
        ).GetList()
        for i in range(0, 5):
            current_date -= date_modifier
            date_str = current_date.__str__()
            for folder in folder_list:
                if folder["title"] == date_str:
                    folders_to_trash.append(folder["id"])
        for folder in folders_to_trash:
            folder = self.drive.CreateFile({"id": folder})
            folder.Trash()
        return

    def upload(self, directory, filename):
        logging.info("%s is being uploaded to the drive in folder %s." % (filename, directory))
        folder_id = None
        folder_list = self.drive.ListFile(
            {"q": "title='%s' and mimeType='application/vnd.google-apps.folder' and trashed=false" % directory}
        ).GetList()
        for folder in folder_list:
            if folder["title"] == directory:
                folder_id = folder["id"]
                break
        if folder_id is None:
            directory = self.drive.CreateFile({"title": directory, "mimeType": "application/vnd.google-apps.folder"})
            directory.Upload()
            folder_id = directory["id"]
        new_file = self.drive.CreateFile({"title": filename, "parents": [{"id": folder_id}]})
        new_file.SetContentFile(filename)
        new_file.Upload()
        remove(filename)
        return

    def upload_log(self):
        logging.debug("Uploading the latest logs to the drive...")
        folder_list = self.drive.ListFile(
            {"q": "mimeType='application/vnd.google-apps.folder' and trashed=false"}
        ).GetList()
        log_folder = None
        for folder in folder_list:
            if folder["title"] == "logs":
                log_folder = self.drive.CreateFile({"id": folder["id"]})
                break
        new_log = self.drive.CreateFile(
            {"title": "%s.log" % get_formatted_time(True),
             "parents": [{"id": log_folder["id"]}]}
        )
        new_log.SetContentFile("app.log")
        new_log.Upload()
        with open("app.log", "w") as log:
            pass
        return

    def refresh_logs(self, current_date):
        logging.info("Refreshing the logs on the drive...")
        log_list = self.drive.ListFile(
            {"q": "title='*.log' and trashed=false"}
        ).GetList()
        for log in log_list:
            print(log["title"])
Ejemplo n.º 14
0
file_paths = retrieve_file_paths(dir_name)

today = date.today().strftime("%Y-%m-%d")

zip_path = os.path.join(dir_name, today + '.zip')

zip_file = zipfile.ZipFile(zip_path, 'w')

print(f'Backing up to {today}.zip')

try:
    with zip_file:
        for file in file_paths:
            zip_file.write(file)
            
    file_list = drive.ListFile({'q': 'title contains ".zip" and trashed=false and "root" in parents'}).GetList()
    file_list = sorted(file_list, key=lambda i: i['title'], reverse=True)
    for file in file_list[REMAIN:]:
        # file['parents'] = [{"kind": "drive#fileLink", "id": TEAM_ID}]
        # file.Upload()
        file.Delete()

    backup_file = drive.CreateFile({'title': today + '.zip'})
    backup_file.SetContentFile(zip_path)
    backup_file.Upload()

    print('Done')
except:
    e = sys.exc_info()[0]
    print(e)
    print('Failed')
Ejemplo n.º 15
0
# pydrive2 is an actively maintained fork
from pydrive2.drive import GoogleDrive
from pydrive2.auth import GoogleAuth, ServiceAccountCredentials

gauth = GoogleAuth()
# drive is nice for sanity checking, but should use drive.file for the Real Thing
# https://developers.google.com/drive/api/v3/about-auth
scope = ['https://www.googleapis.com/auth/drive']
gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
    'google-credentials.json', scope)
drive = GoogleDrive(gauth)

for file_list in drive.ListFile({'maxResults': 10}):
    print('Received {} files from Files.list()'.format(
        len(file_list)))  # <= 10
    for file1 in file_list:
        print('title: {}, id: {}, mime: {}'.format(file1['title'], file1['id'],
                                                   file1['mimeType']))
Ejemplo n.º 16
0
class gDrive:
    def __init__(self):
        ##vars to use later
        self.lastScheduleUpdateTime = datetime.datetime.min
        self.scheduleData = []

        ##load important stuff
        self.root = LOAD_ENV_VARS.ENV_VARS['root']
        self.access_token = LOAD_ENV_VARS.ENV_VARS['gd_access_token']
        self.client_secret = LOAD_ENV_VARS.ENV_VARS['gd_client_secret']
        self.client_id = LOAD_ENV_VARS.ENV_VARS['gd_client_id']
        self.refresh_token = LOAD_ENV_VARS.ENV_VARS['gd_refresh_token']
        self.token_expiry = LOAD_ENV_VARS.ENV_VARS['gd_token_expiry']
        if True:
            ##make client creds
            Text = """{"access_token": %s, "client_id": %s, "client_secret": %s, "refresh_token": %s, "token_expiry": %s, "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": %s, "expires_in": 3600, "refresh_token": %s, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}""" % (
                self.access_token, self.client_id, self.client_secret,
                self.refresh_token, self.token_expiry, self.access_token,
                self.refresh_token)
            f = open("mycreds.txt", "w+")
            f.write(Text)
            f.close()
            ##make client secrets from enviromental variables
            Text = '''{"installed":{"client_id":%s,"project_id":"quickstart-1564436220867","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":%s,"redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}}''' % (
                self.client_id, self.client_secret)
            f = open("client_secrets.json", "w+")
            f.write(Text)
            f.close()
        else:
            self.root = 'Python Bot Test'

        # Try to load saved client credentials
        gauth = GoogleAuth()
        gauth.LoadCredentialsFile("mycreds.txt")
        if gauth.credentials is None:
            # Authenticate if they're not there
            gauth.LocalWebserverAuth()
        elif gauth.access_token_expired:
            # Refresh them if expired
            gauth.Refresh()
        else:
            # Initialize the saved creds
            gauth.Authorize()
        # Save the current credentials to a file
        gauth.SaveCredentialsFile("mycreds.txt")
        self.UpdateEnvVars()
        #initialize drive object
        self.drive = GoogleDrive(gauth)
        ##Setup sechdule
        self.loadSchedule()

    def loadSchedule(self):
        self.lastScheduleUpdateTime = datetime.datetime.now()
        SpreadsheetFile = self.FindOrCreateFolder(['Classes.xlsx'])

        Spreadsheet = self.drive.CreateFile({'id': SpreadsheetFile['id']})
        Spreadsheet.GetContentFile('Classes.xlsx')
        wb = openpyxl.load_workbook('Classes.xlsx')
        DataSheet = wb['Classes']
        Row = 2
        self.scheduleData = []
        while DataSheet.cell(row=Row, column=1).value:
            classDays = DataSheet.cell(row=Row, column=2).value
            struct = {
                'className': (DataSheet.cell(row=Row, column=1).value),
                'startTime': (DataSheet.cell(row=Row, column=3).value),
                'endTime': (DataSheet.cell(row=Row, column=4).value),
                'classDays':
                classDays.replace('Su', '6').replace('M', '0').replace(
                    'Tu', '1').replace('W', '2').replace('Th', '3').replace(
                        'F', '4').replace('Sa', '5')
            }
            self.scheduleData.append(struct)
            Row = Row + 1
        print(self.scheduleData)

    def checkClasses(self, message):
        now = datetime.datetime.fromtimestamp(message['created_at'])
        timeNow = now.time()
        dateNow = now.date()
        if ((now - self.lastScheduleUpdateTime) > datetime.timedelta(
                days=0, hours=1, minutes=0)):
            self.loadSchedule()

        messageScheduleList = []
        for Class in self.scheduleData:
            inClassTime = ((Class['startTime'] < timeNow)
                           and (timeNow < Class['endTime']))
            onClassDay = (str(dateNow.weekday()) in Class['classDays'])
            if inClassTime and onClassDay:
                messageScheduleList.append(Class['className'])

        return messageScheduleList

    def UpdateEnvVars(self):
        f = open('mycreds.txt', 'r')
        c = f.read()
        s = c.split('"')
        self.gd_access_token = s[3]
        self.gd_client_secret = s[11]
        self.gd_client_id = s[7]
        self.gd_refresh_token = s[15]
        self.gd_token_expiry = s[19]
        os.environ["GD_ACCESS_TOKEN"] = self.gd_access_token
        os.environ["GD_CLIENT_SECRET"] = self.gd_client_secret
        os.environ["GD_CLIENT_ID"] = self.gd_client_id
        os.environ["GD_REFRESH_TOKEN"] = self.gd_refresh_token
        os.environ["GD_TOKEN_EXPIRY"] = self.gd_token_expiry

    def FindOrCreateFolder(self, folderNames):
        parent_id = 'root'
        folderNames.insert(0, self.root)
        for folderName in folderNames:
            search_list = []
            file_list = self.drive.ListFile({
                'q':
                "'%s' in parents and trashed=false" % (parent_id)
            }).GetList()
            for file in file_list:
                if (file['title'] == folderName):
                    search_list.append(file)
            if len(search_list) == 0:
                # Create folder.
                folder_metadata = {
                    'title': folderName,
                    # The mimetype defines this new file as a folder, so don't change this.
                    'mimeType': 'application/vnd.google-apps.folder',
                    "parents": [{
                        "kind": "drive#fileLink",
                        "id": parent_id
                    }]
                }
                folder = self.drive.CreateFile(folder_metadata)
                folder.Upload()

            else:
                folder = search_list[0]
            ##now look in that folder
            parent_id = folder['id']
        return (folder)

    def UploadFile(self, path, folderNames):
        folder = self.FindOrCreateFolder(fodlerNames)
        f = self.drive.CreateFile({
            "parents": [{
                "kind": "drive#fileLink",
                "id": folder['id']
            }],
            'title':
            path
        })
        f.SetContentFile(path)
        f.Upload()
        print('Uploaded!')
Ejemplo n.º 17
0
dir_name = '/appdata'

gauth = GoogleAuth()

gauth.LoadCredentialsFile("/credentials/mycreds.txt")
if gauth.access_token_expired:
    gauth.Refresh()
else:
    gauth.Authorize()

gauth.SaveCredentialsFile("/credentials/mycreds.txt")

drive = GoogleDrive(gauth)

file_list = drive.ListFile({
    'q': 'title contains ".zip" and trashed=false'
}).GetList()
file_list = sorted(file_list, key=lambda i: i['title'])
file_id = file_list[-1]['id']
file_name = file_list[-1]['title']

print(f'Downloading {file_name}')
file = drive.CreateFile({'id': file_id})
zip_path = os.path.join(dir_name, file_name)

file.GetContentFile(zip_path)

print(f'Restoring {file_name}')

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall()
Ejemplo n.º 18
0
class GoogleDriveStorage(StorageInterface):
    DEFAULT_DESTINATION_ROOT = "testDir"

    def __init__(self, destination_root_directory=DEFAULT_DESTINATION_ROOT):
        """
        Initialize logging and Google Drive authentication.
        """

        # Enable logging
        logging.basicConfig(
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            level=logging.INFO)

        self.logger = logging.getLogger(__name__)

        # self.destination_root_directory = destination_root_directory
        self.destination_root_directory = self.DEFAULT_DESTINATION_ROOT
        # TODO make directory name configurable
        # this will require a parsing of the directory path
        # all concerned subfolders will have to be looked up recursively in the file_exists function

        gauth = GoogleAuth()
        gauth.LocalWebserverAuth()
        self.drive = GoogleDrive(gauth)

    def file_exists(self, file_name, parent_folder_id):
        """
        Checks whether a file exists in the respective parent folder.\n
        Returns its GoogleDriveFile object, otherwise None.
        """
        file_list = self.drive.ListFile({'q': 'trashed=false'}).GetList()
        for item in file_list:
            parents = item['parents'][0]
            parent_id = parents['id']
            if parent_id == parent_folder_id:
                if item['title'] == file_name:
                    return item
        return None

    def get_root_folder_id(self, parent_folder_name):
        file_list = self.drive.ListFile({
            'q':
            "'root' in parents and trashed=false"
        }).GetList()
        for folder in file_list:
            if folder['title'] == parent_folder_name:
                folder_id = folder['id']
                break

        # create root folder if not existing
        if folder_id is None:
            folder = self.drive.CreateFile({
                'title':
                parent_folder_name,
                "mimeType":
                "application/vnd.google-apps.folder"
            })
            folder.Upload()
            folder_id = folder['id']
        return folder_id

    def upload(self, local_file_name):

        # check whether the file already exists
        folder_id = self.get_root_folder_id(self.destination_root_directory)

        file = self.file_exists(local_file_name, folder_id)
        if file is None:
            logging.info("uploading new file")
            f = self.drive.CreateFile(
                {"parents": [{
                    "kind": "drive#fileLink",
                    "id": folder_id
                }]})
            f.SetContentFile(local_file_name)
            f.Upload()
        else:
            # file already exists
            file_md5 = file['md5Checksum']

            # check if md5sums differ => update the remote file
            local_md5 = get_md5_sum(local_file_name)
            if file_md5 == local_md5:
                logging.info("file is already up to date")
            else:
                logging.info("updating existing file..")
                # update remote file instead of uploading a copy
                file.SetContentFile(local_file_name)
                file.Upload()
                logging.info("remote file updated")

    def upload_multiple(self, files):
        """
        Sequentially uploads multiple files to your Google Drive backend.
        """
        # TODO use multiple threads to do this concurrently
        for file in files:
            self.upload(file)
Ejemplo n.º 19
0
def _list_file(folder_path: List[str], drive: GoogleDrive) -> Tuple[str, list]:
    folder_id = _create_or_find_folder(folder_path, drive)
    return folder_id, drive.ListFile({
        'q': f"'{folder_id}' in parents and trashed=false"
    }).GetList()
Ejemplo n.º 20
0
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import os
import pprint

# 認証情報を生成
gauth = GoogleAuth()

# Creates local webserver and auto handles authentication.
gauth.LocalWebserverAuth() 

# 認証情報をもとにGoogleDriveFileオブジェクトを生成
drive = GoogleDrive(gauth)
f = drive.CreateFile()
print('before',f)

# アップロードするfileを設定
f.SetContentFile('./images/onepiece01_luffy.png')
p = os.path.basename('./images/onepiece01_luffy.png')
f['title'] = p
print('after',f)

id = drive.ListFile({'q':'title= "onepiece01_luffy.png"'}).GetList()[0]['id']
f = drive.CreateFile({'id':id})
print('id: ',f['id'])
print('title: ',f['title'])
permissions = f.GetPermissions()
print(permissions)
Ejemplo n.º 21
0
class Bot:

    #Within this init function, we connect to various APIs and set user defined variables for the methods.
    def __init__(self, configfile = None,  **kwargs):

        #These are the default values for our Bot.
        defaults = {"name":"OpenMediaBot","db":"media.db","gdrive_settings":"settings.yaml"}
        
        #Update the attributes defaults dictionary with any kwargs provided by the user.
        #Since a dictionary does not allow duplicate keys, kwargs provided by the user that were previously set in the default dict will override thier default values.
        defaults.update(kwargs)

        self.__dict__.update(defaults)

        #If a configfile is provided, we will use the values in this file to override any default values or kwargs provided in the attributes dict.
        if configfile is not None:
            
            with open(configfile) as jsonfile:
                
                import json
                
                self.__dict__.update(json.load(jsonfile))

        #We use the bot name as the name for our SQL table, this allows us to house multiple bots in the same databse file.
        #We have to  make sure we sanitize th name of the bot in order to prevent SQL inquection attacks.
        if re.search("[\";\+=()\']", self.name) is not None:
            raise ValueError("The characters [\";\+=()\'] are not allowed in bot names, as they leave you vulnerable to SQL injection attacks.")
        #Set up the bot's logger

        #Create a logger with the bot's name, and set the level to INFO.
        self.logger = logging.getLogger(self.name)
        self.logger.setLevel(logging.INFO)

        #Format our logs.
        formatter = logging.Formatter("%(name)s:%(asctime)s:%(levelname)s:%(message)s")

        #Create a stream handler which logs to the console.
        stream_handler = logging.StreamHandler()

        stream_handler.setFormatter(formatter)

        self.logger.addHandler(stream_handler)

        #If a path is supplied, create the specified filehandler to print to the log file.
        if self.__dict__.get("logpath") is not None:

            file_handler = logging.FileHandler(filename=self.logpath)

            file_handler.setFormatter(formatter)

            self.logger.addHandler(file_handler)

        #If Google Drive folders are provided, create an autheticated PyDrive object.
        if self.__dict__.get("drive_folders") is not None:

            self.logger.info("Connecting to Google Drive...")
            
            #Used for Google Drive.
            from pydrive2.auth import GoogleAuth
            from pydrive2.drive import GoogleDrive

            self.drive = GoogleDrive(GoogleAuth(settings_file=self.gdrive_settings))

        #Connect to our SQLite DB.
        self.connection = sqlite3.connect(self.db)

        #Create a cursor to execute commands within the database.
        self.cursor = self.connection.cursor()

    #This function updates and/or initializes our database.
    def updatedb(self):

        self.logger.info("Updating database...")

        #Create our database table if it doesn't exist.
        self.cursor.execute("""CREATE TABLE IF NOT EXISTS '{}' (
        ID text PRIMARY KEY,
        NAME text NOT NULL,
        LOCATION text NOT NULL,
        POSTED BOOLEAN NOT NULL);""".format(self.name))
       
        #Query a list of IDs from the database.
        self.cursor.execute("SELECT ID FROM '{}'".format(self.name))

        #Get the result of the query.
        db = [row[0] for row in self.cursor.fetchall()]

        #Iterate through all the files in the Google Drive folder.

        with self.connection:

            #If we provide Google Drive folders, scan for changes.
            if self.__dict__.get("drive_folders") is not None:

                for folder_id in self.drive_folders:

                    self.logger.info("Updating Drive folder {}...".format(folder_id))

                    #Get a list of files in the folder.
                    drive_files = self.drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()

                    for file in drive_files:

                        #If the file EXISTS in our database.
                        if (file['id'] in db):
                        
                            #Make sure that the correct filename is reflected in our database.
                            
                            self.cursor.execute("SELECT NAME FROM '{}' WHERE ID=:ID".format(self.name),
                            {"ID":file['id']})

                            #The above query should only return one result, so we canuse fetchone.
                            name = self.cursor.fetchone()[0]

                            #If the name has been changed, update our database.
                            if file['title'] != name:
                                self.cursor.execute("UPDATE '{}' SET NAME=:NAME WHERE ID=:ID".format(self.name),
                                {"NAME":file['title'],"ID":file['id']})
                                self.logger.info("RENAMED: {} TO {}".format(name, file['title']))
                    
                            #If the file exists in our database, remove it from the queried list.
                            #By doing this, we will list of files that are in our database but not in the Google Drive folder.
                            db.remove(file['id'])
                        
                        #If the file DOES NOT exist in our database, update the database with information for the new file.
                        else:

                            self.cursor.execute("INSERT INTO '{}' VALUES (:ID, :NAME, 'DRIVE', 'POSTED'=False)".format(self.name),
                            {"ID":file['id'], "NAME":file['title']})

                            #Output some useful information for logging purposes.
                            self.logger.info("ADDED: {} ({})".format(file['title'], file['id']))
                            
            #If we provide local folders, scan them for changes.
            if self.__dict__.get("local_folders") is not None:

                import os

                for folder in self.local_folders:

                    for file in os.listdir(folder):

                        #The itelligantly joins the folder path and the file name into a filepath. It will use the right structure based on the OS the bot is running on.
                        id = os.path.join(folder,file)

                        #If the file exists in our database, remove it from the queried list.
                        #By doing this, we will list of files that are in our database but not in the local folder.
                        if id in db:

                            db.remove(id)

                        else:

                            #If the file is not in our database, we add it to the database
                            #We use the file path as the ID for local files as the file path must be unique.
                            self.cursor.execute("INSERT INTO '{}' VALUES (:ID, :NAME, 'LOCAL', 'POSTED'=False)".format(self.name),
                            {"ID":id, "NAME":file})

                            self.logger.info("ADDED: {} ({})".format(file, id))

            #The remainder of the entires in the db array are files that exist in our database, but do not exist in any of the provided folders.
            #We will remove these entries from our database.
            for file in db:

                #Grab the name of the file we are about to delete, we want this for logging purposes.
                self.cursor.execute("SELECT NAME FROM '{}' WHERE ID=:ID".format(self.name),
                {"ID":file})
                
                name = self.cursor.fetchone()

                self.cursor.execute("DELETE FROM '{}' WHERE ID=:ID".format(self.name),
                {"ID":file})
            
                self.logger.info("DELETED: {} ({})".format(name, file))

        self.logger.info("Database is up to date!")

    #This function sets the "Posted" value to false for all memebers in our database.
    def resetdb(self):

        #Set the posted on all entries to FALSE
            with self.connection:
                self.logger.info("Restting database...")
                self.cursor.execute("UPDATE '{}' SET POSTED=FALSE".format(self.name))
                self.logger.info("Database Reset!")

    #This function downloads an image from Google Drive and returns a BytesIO object whith some special fields added.
    def DownloadFromDrive(self, id):
        
        from io import BytesIO

        self.logger.info("Fetching data from Google Drive...")

        #Create a Google Drive file object of the media.
        file = self.drive.CreateFile({'id': id})
        
        self.logger.info("DOWNLOADING: {} ({})".format(file['title'],file['id']))

        #We will now write the data from the media to a buffer (BytesIO).
        self.logger.info("Writing data to buffer...")
        
        io = BytesIO()

        #GetContentIOBuffer() returns a interable which we write a chunk of data at a time to a buffer.
        for chunk in file.GetContentIOBuffer():
           io.write(chunk)

        #Make sure that we set the seek to the beginning so our progrma starts reading the buffer from the front.
        io.seek(0)
        
        media= Media(io,file['mimeType'],file['title'],id)
        #Add some useful information to the object

        #Return the media object.
        return media

    #The function downloads a random file from our database.
    def GetRandom(self,no_repeat=True):
        
        import random
    
        self.logger.info("Selecting media from database...")

        #If we don't care about repeats, then just grab an entry.
        if no_repeat == False:
             self.cursor.execute("SELECT * FROM '{}'".format(self.name))
             db = self.cursor.fetchall()

        else:
            #Grab a list of files that haven't been posted from the database.
            self.cursor.execute("SELECT * FROM '{}' WHERE POSTED=FALSE".format(self.name))
            db = self.cursor.fetchall()
        
            #If all the files have been posted, reset the database.
            if db == []:
                self.logger.info("Have posted all photos!")
                
                self.resetdb()
                
                #Get an array of all members of the database.
                self.cursor.execute("SELECT * FROM '{}' WHERE POSTED=FALSE".format(self.name))
                db = self.cursor.fetchall()
                self.logger.info("Selecting media from database...")

        #Choose a random row in our database.
        row = random.choice(db)
        self.logger.info("Selected {} ({})!".format(row[1], row[0]))

        #return a media object downloaded from Drive.
        if row[2] == "DRIVE":
            return self.DownloadFromDrive(row[0])

        #If it is a local file, then return a media object made out of the file.
        elif row[2] == "LOCAL":
            import mimetypes
            return Media(open(row[0],"rb"), mimetypes.guess_type(row[0])[0],row[1],row[0])
Ejemplo n.º 22
0
def search_folder(folderid: str):

    drive = GoogleDrive(gauth)
    file_list = drive.ListFile({'q': f"'{folderid}' in parents"}).GetList()
    return file_list
Ejemplo n.º 23
0
class Drive(object):
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.login()

    def login(self):
        self.gauth = GoogleAuth()

        self.gauth.LoadCredentialsFile("mycreds.txt")
        self.logger.debug("Loading credentials")

        if self.gauth.credentials is None:
            self.logger.debug("First authentication")
            # Authenticate if they're not there
            self.gauth.LocalWebserverAuth()
        elif self.gauth.access_token_expired:
            self.logger.debug("Token has expired, refreshing")
            # Refresh them if expired
            self.gauth.Refresh()
        else:
            self.logger.debug("Connected")
            # Initialize the saved creds
            self.gauth.Authorize()
        # Save the current credentials to a file
        self.gauth.SaveCredentialsFile("mycreds.txt")

        # Create GoogleDrive instance with authenticated GoogleAuth instance
        self.drive = GoogleDrive(self.gauth)

    def find_folders(self, folder_name):
        file_list = self.drive.ListFile({
            "q":
            "title='{}' and mimeType contains 'application/vnd.google-apps.folder' and trashed=false"
            .format(folder_name)
        }).GetList()
        return file_list

    def create_subfolder(self, folder, sub_folder_name):
        new_folder = self.drive.CreateFile({
            "title":
            "{}".format(sub_folder_name),
            "mimeType":
            "application/vnd.google-apps.folder",
        })
        if folder is not None:
            new_folder["parents"] = [{u"id": folder["id"]}]
        new_folder.Upload()
        self.logger.debug("Folder created {}/{}".format(
            folder, sub_folder_name))
        return new_folder

    def upload_files_to_folder(self, fnames, folder):
        for fname in fnames:
            nfile = self.drive.CreateFile({
                "title": os.path.basename(fname),
                "parents": [{
                    u"id": folder["id"]
                }]
            })
            nfile.SetContentFile(fname)
            nfile.Upload()
Ejemplo n.º 24
0
gauth.CommandLineAuth()
drive = GoogleDrive(gauth)

# If provided arguments incorrect, print usage instructions and exit.
if len(sys.argv) < 2:
    print("usage: upload.py <Google Drive folder ID> <local file path>")
    exit(1)  # Exit program as incorrect parameters provided.

parentId = sys.argv[1]
myFilePath = sys.argv[2]
myFileName = path.basename(sys.argv[2])

# Check if file name already exists in folder.
file_list = drive.ListFile({
    "q":
    '"{}" in parents and title="{}" and trashed=false'.format(
        parentId, myFileName)
}).GetList()

# If file is found, update it, otherwise create new file.
if len(file_list) == 1:
    myFile = file_list[0]
else:
    myFile = drive.CreateFile(
        {"parents": [{
            "kind": "drive#fileLink",
            "id": parentId
        }]})

# Upload new file content.
myFile.SetContentFile(myFilePath)
Ejemplo n.º 25
0
class GDriveCommands(object):
    """Access google drive with methods

    Setup
    =====
    # Initialize the object and authenticate
    g = GDriveCommands()

    List root directory
    ===================
    g.ls_root() -> List[pydrive2.files.GoogleDriveFile]
    g.ls() -> List[pydrive2.files.GoogleDriveFile]

    Access Files
    ============
    # These functions can all optionally start with a google drive directory object
    # (GoogleDriveFile with mimeType=application/vnd.google-apps.folder). They start
    # from the root otherwise.
    g.find(GDRIVE_DIRECTORY, *path_elements) -> GDRIVE_FILE/GDRIVE_DIRECTORY
    g.find(*path_elements) -> GDRIVE_FILE/GDRIVE_DIRECTORY
    g.ls(GDRIVE_DIRECTORY, *path_elements) -> list of GDRIVE_FILEs
    g.ls(*path_elements) -> list of GDRIVE_FILEs
    g.exists(GDRIVE_DIRECTORY, *path_elements) -> bool
    g.exists(*path_elements) -> bool

    Download Files
    ==============
    g.download_file(GDRIVE_FILE, local_path, overwrite=g.Overwrite.NEVER)
    g.download_files([GDRIVE_FILE1, GDRIVE_FILE2, ...], local_folder_path, overwrite=g.Overwrite.NEVER)
    g.download_folder(GDRIVE_DIRECTORY, local_folder_path)

    Overwrite Modes
    ===============
    g.Overwrite.NEVER
    g.Overwrite.ALWAYS
    g.Overwrite.ON_FILESIZE_CHANGE
    g.Overwrite.ON_MD5_CHECKSUM_CHANGE

    Upload Files/Create Folders
    ===========================
    g.create_folder(GDRIVE_DIRECTORY, folder_name)
    g.upload_file(local_file_path, GDRIVE_DIRECTORY)
    """
    class Overwrite(enum.Enum):
        NEVER = 0
        ALWAYS = 1
        ON_FILESIZE_CHANGE = 2
        ON_MD5_CHECKSUM_CHANGE = 3

    def __init__(self, settings_file="settings.yaml", log_level=logging.INFO):
        self.drive = GoogleDrive(self._get_auth(settings_file))

        self.logger = logging.getLogger("gdrive_access.access.GDriveCommands")
        self.logger.setLevel(log_level)

    def _split_root_and_path(self, *path):
        """Split a list of path elements into the root and string path

        Returns a tuple of (root: GoogleDriveFile, path: List[str])
        """
        if not len(path) or isinstance(path[0], str):
            return RootDrive(), path

        if not isinstance(path[0], GoogleDriveFile):
            raise ValueError(
                "The path must either be all strings or start with a GoogleDriveFile"
            )

        return path[0], path[1:]

    def get_root(self, folder_name: str, shared: bool = False):
        """Get the root directory to start queries from

        Params
        ======
        folder_name: Name of folder to search for
        shared (default=False): If true, only searches for folders in "Shared with Me"

        Returns:
            pydrive.GoogleDriveFile object of the root location
        """
        if shared is False:
            result_list = PyDriveListWrapper(
                self.drive.ListFile({
                    "q": "title = '{}'".format(folder_name)
                }).GetList())
        else:
            result_list = PyDriveListWrapper(
                self.drive.ListFile({
                    "q":
                    "title = '{}' and sharedWithMe".format(folder_name)
                }).GetList())

        if len(result_list) > 1:
            logger.warning(
                "Located {} files by name {}. Selecting the first one".format(
                    len(result_list), folder_name))
        elif len(result_list) == 0:
            raise NotFoundError(
                "{}older '{}' not found. Use ls() or "
                "ls_root() to see potential top level folders".format(
                    "Shared f" if shared else "F", folder_name))

        return result_list[0]

    def _get_auth(self, settings_file="settings.yaml"):
        if not os.path.exists(settings_file):
            raise CredentialsNotFound(
                "Settings file {} was not found.\n"
                "Please fix the path to the settings.yaml file or set up credentials with\n"
                "'python -m gdrive_access.setup_credentials --dir CREDENTIALDIR'"
                .format(settings_file))
        try:
            return get_auth(settings_file)
        except FileNotFoundError:
            raise CredentialsNotFound(
                "Credentials were not found; Perhaps you should try running\n"
                "'python -m gdrive_access.setup_credentials --dir CREDENTIALDIR'\n"
                "or fixing the location of the credentials location set in {}".
                format(settings_file))

    def _to_id(self, file_: GoogleDriveFile):
        """Return the id of the object unless it is a shortcut, then find the true id.
        """
        if file_["mimeType"] == "application/vnd.google-apps.shortcut":
            file_.FetchMetadata(fields="shortcutDetails")
            return file_["shortcutDetails"]["targetId"]
        else:
            return file_["id"]

    def _check_if_overwrite_okay(self, overwrite: Overwrite,
                                 gdrive_file: GoogleDriveFile,
                                 download_to_path: str):
        if overwrite is self.Overwrite.NEVER:
            return False
        elif overwrite is self.Overwrite.ALWAYS:
            return True
        elif overwrite is self.Overwrite.ON_FILESIZE_CHANGE:
            local_filesize = os.path.getsize(download_to_path)
            gdrive_filesize = gdrive_file.metadata["fileSize"]
            return local_filesize != gdrive_filesize
        elif overwrite is self.Overwrite.ON_MD5_CHECKSUM_CHANGE:
            local_checksum = _md5(download_to_path)
            gdrive_checksum = gdrive_file.metadata["md5Checksum"]
            return local_checksum != gdrive_checksum

    def _find_one_level(self, dir: GoogleDriveFile, filename: str):
        """Look for a filename in google drive directory

        Params
        dir: a GoogleDriveFile representing a folder to locate the file in
        filename: the str name of the file or directory to look for

        Returns:
            pydrive.GoogleDriveFile object representing the file being searched for
        """
        time.sleep(0.01)
        file_list = PyDriveListWrapper(
            self.drive.ListFile({
                "q":
                "title = '{}' and '{}' in parents and "
                "trashed = false".format(filename, self._to_id(dir))
            }).GetList())

        if not len(file_list):
            raise NotFoundError("{}/{} not found".format(
                dir["title"], filename))

        if len(file_list) > 1:
            raise MultipleFilesError

        return file_list[0]

    def find(self, *path):
        """Get for a specific path in google drive directory

        Params
        *path: each individual path element. The first one can optionally be a
            GoogleDriveFile representing a directory to start from

        Returns:
            pydrive.GoogleDriveFile object representing the file being searched for
        """
        root, path = self._split_root_and_path(*path)
        result = root
        for path_element in path:
            result = self._find_one_level(result, path_element)
        return result

    def ls(self, *path):
        id_ = self._to_id(self.find(*path))
        time.sleep(0.01)
        return PyDriveListWrapper(
            self.drive.ListFile({
                "q":
                "'{}' in parents and trashed = false".format(id_)
            }).GetList())

    def ls_root(self):
        return self.ls()

    def exists(self, *path):
        try:
            self.find(*path)
        except NotFoundError:
            return False
        else:
            return True

    def download_file(self,
                      gdrive_file,
                      download_to_path,
                      overwrite: Overwrite = Overwrite.NEVER):
        """Download a file from google drive

        Params
        gdrive_file (pydrive file): file to download
        download_to_path (str): location on local filesystem to download data
        overwrite (GDriveCommands.Overwrite, default=NEVER): Overwrite mode
        """
        if gdrive_file["mimeType"] == "application/vnd.google-apps.folder":
            return self.download_folder(gdrive_file,
                                        download_to_path,
                                        overwrite=overwrite)

        if os.path.isdir(download_to_path):
            download_to_path = os.path.join(download_to_path,
                                            gdrive_file["title"])
        if os.path.exists(
                download_to_path) and not self._check_if_overwrite_okay(
                    overwrite, gdrive_file, download_to_path):
            return

        time.sleep(0.01)
        gdrive_file.GetContentFile(download_to_path)

    def create_folder(self, create_in, folder_name, return_if_exists=True):
        """Create a folder in google drive

        Params
        create_in (pydrive object): the folder to create a new folder in on google drive (e.g. the output
            of create_folder() or find())
        folder_name (string): the name of the new folder to create
        return_if_exists (bool, default True): return the existing folder if it already exists on google
            drive. If set to False, will raise an error if the folder already exists.
        """
        if self.exists(create_in, folder_name):
            if return_if_exists:
                return self.find(create_in, folder_name)
            else:
                raise FolderExists("Folder already exists")

        new_folder = self.drive.CreateFile({
            "title":
            folder_name,
            "parents": [{
                "id": self._to_id(create_in)
            }],
            "mimeType":
            "application/vnd.google-apps.folder"
        })
        time.sleep(0.01)
        new_folder.Upload()
        return new_folder

    def upload_file(self,
                    local_file_path,
                    upload_to,
                    uploaded_name=None,
                    overwrite: Overwrite = Overwrite.ON_MD5_CHECKSUM_CHANGE):
        """Uploads a file to a gdrive folder

        Params
        local_file_path (string): the path to the file on your computer
        upload_to (pydrive object): pydrive folder object (e.g. the output of create_folder() or find())
        uploaded_name (string, optional): name to call the uploaded file in google drive. Uses the files actual
            name if left as None. Note that google drive allows for multiple files with the same name!
        overwrite (GDriveCommands.Overwrite, default=ON_MD5_CHECKSUM_CHANGE): set to True to upload it no matter what.
            If false, won't upload if a file by the same name already exists on google drive.
            (Note that google drive allows for multiple files of the same name, so it wont actually overwrite
            even if it is set)
        """
        if uploaded_name is None:
            filename = os.path.basename(local_file_path)
        else:
            filename = uploaded_name

        self.logger.info("Uploading {} to {}".format(local_file_path,
                                                     upload_to["title"]))

        if self.exists(upload_to, filename):
            existing_file = self.find(upload_to, filename)
            if not self._check_if_overwrite_okay(overwrite, existing_file,
                                                 local_file_path):
                self.logger.info("{} already exists at {}".format(
                    local_file_path, upload_to["title"]))
                raise FileExists(
                    "File already exists on google drive, can't overwrite with overwrite={}"
                    .format(overwrite))

        new_file = self.drive.CreateFile({
            "parents": [{
                "id": self._to_id(upload_to)
            }],
            "title":
            uploaded_name or filename,
        })
        new_file.SetContentFile(local_file_path)
        time.sleep(0.01)
        new_file.Upload()
        self.logger.info("Uploaded {} to {}".format(local_file_path,
                                                    upload_to["title"]))

    def upload_folder(
            self,
            local_folder_path,
            upload_to,
            uploaded_name=None,
            overwrite_file: Overwrite = Overwrite.ON_MD5_CHECKSUM_CHANGE,
            overwrite_folder=False):
        """Upload a local folder and its contents to google drive

        Attempts to preserve folder structure. overwrite_folder False will not attempt to write at a folder that exists
        """
        if uploaded_name is None:
            foldername = os.path.basename(local_folder_path)
        else:
            foldername = uploaded_name

        gdrive_folder = self.create_folder(upload_to,
                                           foldername,
                                           return_if_exists=overwrite_folder)

        for content in glob.glob(os.path.join(local_folder_path, "*")):
            if os.path.isdir(content):
                self.upload_folder(content, gdrive_folder)
            else:
                try:
                    self.upload_file(content,
                                     gdrive_folder,
                                     overwrite=overwrite_file)
                except FileExists:
                    pass

    def download_files(self,
                       gdrive_files,
                       download_to_path,
                       overwrite: Overwrite = Overwrite.NEVER):
        """Download files from google drive

        Params
        gdrive_files (list of pydrive files): files to download (e.g. the output from ls() or
            a list of ouputs from find())
        download_to_path (str): location on local filesystem to download data
        overwrite (GDriveCommands.Overwrite, default=NEVER): Overwrite mode
        """
        if not os.path.exists(download_to_path):
            os.makedirs(download_to_path)

        if not os.path.isdir(download_to_path):
            raise Exception("download_to_path must be an existing directory")

        for file in gdrive_files:
            self.download_file(file, download_to_path, overwrite=overwrite)

    def download_folder(self,
                        gdrive_folder,
                        download_to_path,
                        overwrite: Overwrite = Overwrite.NEVER):
        """Download files from google drive

        Params
        gdrive_files (list of pydrive files): files to download
        download_to_path (str): location on local filesystem to download data
        """
        download_to_path = os.path.join(download_to_path,
                                        gdrive_folder["title"])
        if not os.path.exists(download_to_path):
            os.makedirs(download_to_path)

        for f in self.ls(gdrive_folder):
            time.sleep(0.01)
            if f["mimeType"] == "application/vnd.google-apps.folder":
                self.download_folder(f, download_to_path, overwrite=overwrite)
            else:
                self.download_file(f,
                                   os.path.join(download_to_path, f["title"]),
                                   overwrite=overwrite)
Ejemplo n.º 26
0
class GDriveFileSystem(AbstractFileSystem):
    def __init__(self, path, google_auth, trash_only=True, **kwargs):
        self.path = path
        self.root, self.base = self.split_path(self.path)
        self.client = GoogleDrive(google_auth)
        self._trash_only = trash_only
        super().__init__(**kwargs)

    def split_path(self, path):
        parts = path.replace("//", "/").rstrip("/").split("/", 1)
        if len(parts) == 2:
            return parts
        else:
            return parts[0], ""

    @wrap_prop(threading.RLock())
    @cached_property
    def _ids_cache(self):
        cache = {
            "dirs":
            defaultdict(list),
            "ids": {},
            "root_id":
            self._get_item_id(
                self.path,
                use_cache=False,
                hint="Confirm the directory exists and you can access it.",
            ),
        }

        self._cache_path_id(self.base, cache["root_id"], cache=cache)

        for item in self._gdrive_list(
                "'{}' in parents and trashed=false".format(cache["root_id"])):
            item_path = posixpath.join(self.base, item["title"])
            self._cache_path_id(item_path, item["id"], cache=cache)

        return cache

    def _cache_path_id(self, path, *item_ids, cache=None):
        cache = cache or self._ids_cache
        for item_id in item_ids:
            cache["dirs"][path].append(item_id)
            cache["ids"][item_id] = path

    @cached_property
    def _list_params(self):
        params = {"corpora": "default"}
        if self.root != "root" and self.root != "appDataFolder":
            drive_id = self._gdrive_shared_drive_id(self.root)
            if drive_id:
                logger.debug(
                    "GDrive remote '{}' is using shared drive id '{}'.".format(
                        self.path, drive_id))
                params["driveId"] = drive_id
                params["corpora"] = "drive"
        return params

    @_gdrive_retry
    def _gdrive_shared_drive_id(self, item_id):
        from pydrive2.files import ApiRequestError

        param = {"id": item_id}
        # it does not create a file on the remote
        item = self.client.CreateFile(param)
        # ID of the shared drive the item resides in.
        # Only populated for items in shared drives.
        try:
            item.FetchMetadata("driveId")
        except ApiRequestError as exc:
            error_code = exc.error.get("code", 0)
            if error_code == 404:
                raise PermissionError from exc
            raise

        return item.get("driveId", None)

    def _gdrive_list(self, query):
        param = {"q": query, "maxResults": 1000}
        param.update(self._list_params)
        file_list = self.client.ListFile(param)

        # Isolate and decorate fetching of remote drive items in pages.
        get_list = _gdrive_retry(lambda: next(file_list, None))

        # Fetch pages until None is received, lazily flatten the thing.
        return cat(iter(get_list, None))

    def _gdrive_list_ids(self, query_ids):
        query = " or ".join(f"'{query_id}' in parents"
                            for query_id in query_ids)
        query = f"({query}) and trashed=false"
        return self._gdrive_list(query)

    def _get_remote_item_ids(self, parent_ids, title):
        if not parent_ids:
            return None
        query = "trashed=false and ({})".format(" or ".join(
            f"'{parent_id}' in parents" for parent_id in parent_ids))
        query += " and title='{}'".format(title.replace("'", "\\'"))

        # GDrive list API is case insensitive, we need to compare
        # all results and pick the ones with the right title
        return [
            item["id"] for item in self._gdrive_list(query)
            if item["title"] == title
        ]

    def _get_cached_item_ids(self, path, use_cache):
        if not path:
            return [self.root]
        if use_cache:
            return self._ids_cache["dirs"].get(path, [])
        return []

    def _path_to_item_ids(self, path, create=False, use_cache=True):
        item_ids = self._get_cached_item_ids(path, use_cache)
        if item_ids:
            return item_ids

        parent_path, title = posixpath.split(path)
        parent_ids = self._path_to_item_ids(parent_path, create, use_cache)
        item_ids = self._get_remote_item_ids(parent_ids, title)
        if item_ids:
            return item_ids

        return ([self._create_dir(min(parent_ids), title, path)]
                if create else [])

    def _get_item_id(self, path, create=False, use_cache=True, hint=None):
        bucket, base = self.split_path(path)
        assert bucket == self.root

        item_ids = self._path_to_item_ids(base, create, use_cache)
        if item_ids:
            return min(item_ids)

        assert not create
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), hint
                                or path)

    @_gdrive_retry
    def _gdrive_create_dir(self, parent_id, title):
        parent = {"id": parent_id}
        item = self.client.CreateFile({
            "title": title,
            "parents": [parent],
            "mimeType": FOLDER_MIME_TYPE
        })
        item.Upload()
        return item

    @wrap_with(threading.RLock())
    def _create_dir(self, parent_id, title, remote_path):
        cached = self._ids_cache["dirs"].get(remote_path)
        if cached:
            return cached[0]

        item = self._gdrive_create_dir(parent_id, title)

        if parent_id == self._ids_cache["root_id"]:
            self._cache_path_id(remote_path, item["id"])

        return item["id"]

    def exists(self, path):
        try:
            self._get_item_id(path)
        except FileNotFoundError:
            return False
        else:
            return True

    @_gdrive_retry
    def info(self, path):
        bucket, base = self.split_path(path)
        item_id = self._get_item_id(path)
        gdrive_file = self.client.CreateFile({"id": item_id})
        gdrive_file.FetchMetadata()

        metadata = {"name": posixpath.join(bucket, base.rstrip("/"))}
        if gdrive_file["mimeType"] == FOLDER_MIME_TYPE:
            metadata["type"] = "directory"
            metadata["size"] = 0
            metadata["name"] += "/"
        else:
            metadata["type"] = "file"
            metadata["size"] = int(gdrive_file.get("fileSize"))
            metadata["checksum"] = gdrive_file["md5Checksum"]
        return metadata

    def ls(self, path, detail=False):
        bucket, base = self.split_path(path)

        cached = base in self._ids_cache["dirs"]
        if cached:
            dir_ids = self._ids_cache["dirs"][base]
        else:
            dir_ids = self._path_to_item_ids(base)

        if not dir_ids:
            return None

        root_path = posixpath.join(bucket, base)
        contents = []
        for item in self._gdrive_list_ids(dir_ids):
            item_path = posixpath.join(root_path, item["title"])
            if item["mimeType"] == FOLDER_MIME_TYPE:
                contents.append({
                    "type": "directory",
                    "name": item_path.rstrip("/") + "/",
                    "size": 0,
                })
            else:
                contents.append({
                    "type": "file",
                    "name": item_path,
                    "size": int(item["fileSize"]),
                    "checksum": item["md5Checksum"],
                })

        if not cached:
            self._cache_path_id(root_path, *dir_ids)

        if detail:
            return contents
        else:
            return [content["name"] for content in contents]

    def find(self, path, detail=False, **kwargs):
        bucket, base = self.split_path(path)

        seen_paths = set()
        dir_ids = [self._ids_cache["ids"].copy()]
        contents = []
        while dir_ids:
            query_ids = {
                dir_id: dir_name
                for dir_id, dir_name in dir_ids.pop().items()
                if posixpath.commonpath([base, dir_name]) == base
                if dir_id not in seen_paths
            }
            if not query_ids:
                continue

            seen_paths |= query_ids.keys()

            new_query_ids = {}
            dir_ids.append(new_query_ids)
            for item in self._gdrive_list_ids(query_ids):
                parent_id = item["parents"][0]["id"]
                item_path = posixpath.join(query_ids[parent_id], item["title"])
                if item["mimeType"] == FOLDER_MIME_TYPE:
                    new_query_ids[item["id"]] = item_path
                    self._cache_path_id(item_path, item["id"])
                    continue

                contents.append({
                    "name": posixpath.join(bucket, item_path),
                    "type": "file",
                    "size": int(item["fileSize"]),
                    "checksum": item["md5Checksum"],
                })

        if detail:
            return {content["name"]: content for content in contents}
        else:
            return [content["name"] for content in contents]

    def upload_fobj(self, stream, rpath, callback=None, **kwargs):
        parent_id = self._get_item_id(self._parent(rpath), create=True)
        if callback:
            stream = CallbackIOWrapper(callback.relative_update, stream,
                                       "read")
        return self.gdrive_upload_fobj(posixpath.basename(rpath.rstrip("/")),
                                       parent_id, stream)

    def put_file(self, lpath, rpath, callback=None, **kwargs):
        if callback:
            callback.set_size(os.path.getsize(lpath))
        with open(lpath, "rb") as stream:
            self.upload_fobj(stream, rpath, callback=callback)

    @_gdrive_retry
    def gdrive_upload_fobj(self, title, parent_id, stream, callback=None):
        item = self.client.CreateFile({
            "title": title,
            "parents": [{
                "id": parent_id
            }]
        })
        item.content = stream
        item.Upload()
        return item

    def cp_file(self, lpath, rpath, **kwargs):
        """In-memory streamed copy"""
        with self.open(lpath) as stream:
            # IterStream objects doesn't support full-length
            # seek() calls, so we have to wrap the data with
            # an external buffer.
            buffer = io.BytesIO(stream.read())
            self.upload_fobj(buffer, rpath)

    def get_file(self, lpath, rpath, callback=None, block_size=None, **kwargs):
        item_id = self._get_item_id(lpath)
        return self.gdrive_get_file(item_id,
                                    rpath,
                                    callback=callback,
                                    block_size=block_size)

    @_gdrive_retry
    def gdrive_get_file(self, item_id, rpath, callback=None, block_size=None):
        param = {"id": item_id}
        # it does not create a file on the remote
        gdrive_file = self.client.CreateFile(param)

        extra_args = {}
        if block_size:
            extra_args["chunksize"] = block_size

        if callback:

            def cb(value, _):
                callback.absolute_update(value)

            gdrive_file.FetchMetadata(fields="fileSize")
            callback.set_size(int(gdrive_file.get("fileSize")))
            extra_args["callback"] = cb

        gdrive_file.GetContentFile(rpath, **extra_args)

    def _open(self, path, mode, **kwargs):
        assert mode in {"rb", "wb"}
        if mode == "wb":
            return GDriveBufferedWriter(self, path)
        else:
            item_id = self._get_item_id(path)
            return self.gdrive_open_file(item_id)

    @_gdrive_retry
    def gdrive_open_file(self, item_id):
        param = {"id": item_id}
        # it does not create a file on the remote
        gdrive_file = self.client.CreateFile(param)
        fd = gdrive_file.GetContentIOBuffer()
        return IterStream(iter(fd))

    def rm_file(self, path):
        item_id = self._get_item_id(path)
        self.gdrive_delete_file(item_id)

    @_gdrive_retry
    def gdrive_delete_file(self, item_id):
        from pydrive2.files import ApiRequestError

        param = {"id": item_id}
        # it does not create a file on the remote
        item = self.client.CreateFile(param)

        try:
            item.Trash() if self._trash_only else item.Delete()
        except ApiRequestError as exc:
            http_error_code = exc.error.get("code", 0)
            if (http_error_code == 403
                    and self._list_params["corpora"] == "drive"
                    and exc.GetField("location") == "file.permissions"):
                raise PermissionError(
                    "Insufficient permissions to {}. You should have {} "
                    "access level for the used shared drive. More details "
                    "at {}.".format(
                        "move the file into Trash"
                        if self._trash_only else "permanently delete the file",
                        "Manager or Content Manager"
                        if self._trash_only else "Manager",
                        "https://support.google.com/a/answer/7337554",
                    )) from exc
            raise
Ejemplo n.º 27
0
class pydrive2_interface:
    """
  Wrapper for uploading versioned mtzs and logs using Pydrive2. Constructed from
  a service account credentials file and the Google Drive id of the top-level
  destination folder.
  """
    def __init__(self, cred_file, folder_id):
        try:
            from pydrive2.auth import ServiceAccountCredentials, GoogleAuth
            from pydrive2.drive import GoogleDrive
        except ImportError:
            raise Sorry(
                "Pydrive2 not found. Try:\n$ conda install pydrive2 -c conda-forge"
            )
        gauth = GoogleAuth()
        scope = ['https://www.googleapis.com/auth/drive']
        gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name(
            cred_file, scope)
        self.drive = GoogleDrive(gauth)
        self.top_folder_id = folder_id

    def _fetch_or_create_folder(self, fname, parent_id):
        query = {
            "q": "'{}' in parents and title='{}'".format(parent_id, fname),
            "supportsTeamDrives": "true",
            "includeItemsFromAllDrives": "true",
            "corpora": "allDrives"
        }
        hits = self.drive.ListFile(query).GetList()
        if hits:
            assert len(hits) == 1
            return hits[0]['id']
        else:
            query = {
                "title": fname,
                "mimeType": "application/vnd.google-apps.folder",
                "parents": [{
                    "kind": "drive#fileLink",
                    "id": parent_id
                }]
            }
            f = self.drive.CreateFile(query)
            f.Upload()
            return f['id']

    def _upload_detail(self, file_path, parent_id):
        title = os.path.split(file_path)[1]
        query = {
            "title": title,
            "parents": [{
                "kind": "drive#fileLink",
                "id": parent_id
            }]
        }
        f = self.drive.CreateFile(query)
        f.SetContentFile(file_path)
        f.Upload()

    def upload(self, folder_list, files):
        """
    Upload from the given file paths to a folder defined by the hierarchy in
    folder_list. So if `folders` is ['a', 'b'] and `files` is [f1, f2], then
    inside the folder defined by self.folder_id, we create nested folder a/b/
    and upload f1 and f2 to that folder.
    """
        current_folder_id = self.top_folder_id
        for fname in folder_list:
            current_folder_id = self._fetch_or_create_folder(
                fname, current_folder_id)
        for file in files:
            self._upload_detail(file, current_folder_id)
Ejemplo n.º 28
0
class GDriveServerNode(object):
    folder_mime_type = 'application/vnd.google-apps.folder'
    folder_url_format = 'https://drive.google.com/drive/folders/{}'
    file_url_format = 'https://drive.google.com/uc?id={}'

    def __init__(self):
        settings_yaml = rospy.get_param('~settings_yaml', None)
        self.share_type = rospy.get_param('~share_type', 'anyone')
        self.share_value = rospy.get_param('~share_value', 'anyone')
        self.share_role = rospy.get_param('~share_role', 'reader')
        self.share_with_link = rospy.get_param('~share_with_link', True)
        auth_max_trial = rospy.get_param('~auth_max_trial', -1)
        auth_wait_seconds = rospy.get_param('~auth_wait_seconds', 10.0)
        if settings_yaml is not None:
            self.gauth = GoogleAuth(settings_yaml)
        else:
            rospy.logerr('param: ~settings_yaml is not correctly set.')
            sys.exit(1)

        rospy.loginfo('Google drive authentication starts.')
        auth_success = False
        auth_count = 0
        while (not auth_success
               and (auth_max_trial < 0 or auth_count < auth_max_trial)):
            try:
                self.gauth.LocalWebserverAuth()
                auth_success = True
            except ServerNotFoundError as e:
                rospy.logerr('Authentication failed: {}'.format(e))
                auth_count = auth_count + 1
                time.sleep(auth_wait_seconds)
        if not auth_success:
            rospy.logerr(
                'Authentication failed {} times.'.format(auth_max_trial))
            sys.exit(1)
        self.gdrive = GoogleDrive(self.gauth)
        rospy.loginfo('Google drive authentication finished.')
        self.upload_server = rospy.Service('~upload', Upload, self._upload_cb)
        self.upload_multi_server = rospy.Service('~upload_multi',
                                                 MultipleUpload,
                                                 self._upload_multi_cb)
        rospy.loginfo('Finish initialization, Server started.')

    def _upload_cb(self, req):
        timestamp = '{0:%Y%m%d%H%M%S}'.format(datetime.datetime.now())
        parents_path = req.parents_path
        parents_id = req.parents_id

        # response initialization
        res = UploadResponse()
        res.success = False
        res.file_id = ''
        res.file_url = ''

        if parents_id and parents_path:
            rospy.logerr('parents_path and parents_id is both set.')
            rospy.logerr(
                'parents_id: {} is selected to upload.'.format(parents_id))
            parents_path = ''

        if parents_path:
            try:
                parents_id = self._get_parents_id(parents_path, mkdir=True)
            except (ValueError, ApiRequestError, ServerNotFoundError) as e:
                rospy.logerr(e)
                rospy.logerr(
                    'Failed to get parents_id: {}'.format(parents_path))
                return res
        # root
        elif parents_id == '' and parents_path == '':
            parents_id = ''

        if req.use_timestamp_folder:
            try:
                parents_id = self._get_parents_id([timestamp],
                                                  parents_id=parents_id,
                                                  mkdir=True)
            except (ValueError, ApiRequestError, ServerNotFoundError) as e:
                rospy.logerr(e)
                rospy.logerr('Failed to get parents_id: {} in {}'.format(
                    timestamp, self.folder_url_format.format(parents_id)))
                return res

        success, file_id, file_url = self._upload_step(
            req.file_path, req.file_title, parents_id,
            req.use_timestamp_file_title, timestamp)
        res.success = success
        res.file_id = file_id
        res.file_url = file_url
        res.parents_id = parents_id
        res.parents_url = self.folder_url_format.format(parents_id)
        return res

    def _upload_multi_cb(self, req):
        timestamp = '{0:%Y%m%d%H%M%S}'.format(datetime.datetime.now())
        parents_path = req.parents_path
        parents_id = req.parents_id

        # response initialization
        res = MultipleUploadResponse()
        res.successes = [False] * len(req.file_titles)
        res.file_ids = [''] * len(req.file_titles)
        res.file_urls = [''] * len(req.file_titles)

        if parents_id and parents_path:
            rospy.logerr('parents_path and parents_id is both set.')
            rospy.logerr(
                'parents_id: {} is selected to upload.'.format(parents_id))
            parents_path = ''

        if parents_path:
            try:
                parents_id = self._get_parents_id(parents_path, mkdir=True)
            except (ValueError, ApiRequestError, ServerNotFoundError) as e:
                rospy.logerr(e)
                rospy.logerr(
                    'Failed to get parents_id: {}'.format(parents_path))
                return res
        # root
        elif parents_id == '' and parents_path == '':
            parents_id = ''

        if req.use_timestamp_folder:
            try:
                parents_id = self._get_parents_id([timestamp],
                                                  parents_id=parents_id,
                                                  mkdir=True)
            except (ValueError, ApiRequestError, ServerNotFoundError) as e:
                rospy.logerr(e)
                rospy.logerr('Failed to get parents_id: {} in {}'.format(
                    timestamp, self.folder_url_format.format(parents_id)))
                return res

        for i, (file_path,
                file_title) in enumerate(zip(req.file_paths, req.file_titles)):
            success, file_id, file_url = self._upload_step(
                file_path, file_title, parents_id,
                req.use_timestamp_file_title, timestamp)
            res.successes[i] = success
            res.file_ids[i] = file_id
            res.file_urls[i] = file_url
        res.parents_id = parents_id
        res.parents_url = self.folder_url_format.format(parents_id)
        return res

    def _upload_step(self,
                     file_path,
                     file_title,
                     parents_id,
                     use_timestamp_file_title=False,
                     timestamp=None):
        file_title = file_title if file_title else file_path.split('/')[-1]
        file_path = os.path.expanduser(file_path)
        if use_timestamp_file_title:
            file_title = '{}_{}'.format(timestamp, file_title)

        success = False
        file_id = ''
        file_url = ''
        folder_url = self.folder_url_format.format(parents_id)
        try:
            file_id = self._upload_file(file_path,
                                        file_title,
                                        parents_id=parents_id)
            file_url = self.file_url_format.format(file_id)
            success = True
            rospy.loginfo('Success to upload: {} -> {}'.format(
                file_path, file_url))
        except (OSError, ApiRequestError, ServerNotFoundError) as e:
            rospy.logerr(e)
            rospy.logerr('Failed to upload: {} -> {}'.format(
                file_path, folder_url))
        return success, file_id, file_url

    def _upload_file(self, file_path, file_title, parents_id=None):
        if not os.path.exists(file_path):
            raise OSError('File not found: {}'.format(file_path))
        rospy.loginfo('Start uploading a file: {}'.format(file_title))
        if parents_id:
            gfile = self.gdrive.CreateFile({'parents': [{'id': parents_id}]})
        else:
            gfile = self.gdrive.CreateFile()
        gfile.SetContentFile(file_path)
        gfile['title'] = file_title
        gfile.Upload()
        gfile.InsertPermission({
            'type': self.share_type,
            'value': self.share_value,
            'role': self.share_role,
            'withLink': self.share_with_link,
        })
        rospy.loginfo('Finish uploading a file: {}'.format(file_title))
        return gfile['id']

    def _upload_folder(self, folder_title, parents_id=None):
        rospy.loginfo('Start making a folder: {}'.format(folder_title))
        if parents_id:
            gfolder = self.gdrive.CreateFile({
                'title':
                folder_title,
                'parents': [{
                    'id': parents_id
                }],
                'mimeType':
                'application/vnd.google-apps.folder'
            })
        else:
            gfolder = self.gdrive.CreateFile({
                'title':
                folder_title,
                'mimeType':
                'application/vnd.google-apps.folder'
            })
        gfolder.Upload()
        rospy.loginfo('Finish making a folder: {}'.format(folder_title))
        return gfolder['id']

    def _get_parents_id(self, parents_path, parents_id=None, mkdir=False):
        if parents_path == '':
            return None

        if not isinstance(parents_path, list):
            parents_path = [p for p in parents_path.split('/') if p != '']

        folder_title = parents_path[0]
        parent = parents_id if parents_id else 'root'
        gfiles = self.gdrive.ListFile(
            {'q': "'{}' in parents and trashed=false".format(parent)})
        gfiles = gfiles.GetList()
        gfolders = []
        for gf in gfiles:
            if (gf['mimeType'] == self.folder_mime_type
                    and gf['title'] == folder_title):
                gfolders.append(gf)

        if len(parents_path) == 1:
            if len(gfolders) > 0:
                return gfolders[0]['id']
            if mkdir:
                folder_id = self._upload_folder(folder_title,
                                                parents_id=parents_id)
                return folder_id
            else:
                raise ValueError(
                    'Folder is not found: {}'.format(folder_title))
        else:
            if len(gfolders) > 0 or mkdir:
                if len(gfolders) > 0:
                    next_parents_id = gfolders[0]['id']
                elif mkdir:
                    next_parents_id = self._upload_folder(
                        folder_title, parents_id=parents_id)
                folder_id = self._get_parents_id(parents_path[1:],
                                                 parents_id=next_parents_id,
                                                 mkdir=mkdir)
                return folder_id
            else:
                raise ValueError('folder is not found: {}', folder_title)
Ejemplo n.º 29
0
                  default=False,
                  action='store_true',
                  help='Overwrite mode (%default)')
(opts, args) = parser.parse_args()
if len(args) < 1:
    parser.print_help()
    sys.exit(0)
fnams = args

gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# Get Spatial-Information folder
l = drive.ListFile({
    'q':
    '"root" in parents and trashed = false and mimeType = "application/vnd.google-apps.folder" and title contains "Spatial-Information"'
}).GetList()
if len(l) != 1:
    raise ValueError('Error in finding Spatial-Information folder')
folder_spatial_information = l[0]
# Get SENTINEL-1 folder
l = drive.ListFile({
    'q':
    '"{}" in parents and trashed = false and mimeType = "application/vnd.google-apps.folder" and title contains "SENTINEL-1"'
    .format(folder_spatial_information['id'])
}).GetList()
if len(l) != 1:
    raise ValueError('Error in finding SENTINEL-1 folder')
folder_sentinel_1 = l[0]
# Get GRD folder
l = drive.ListFile({