def pull(bucket, local_file, metadata_only=False): objname = os.path.basename(local_file) k = None try: k = JBoxGS.connect().objects().get(bucket=bucket, object=objname).execute() except HttpError as err: if err._get_reason() != 'Not Found': raise(err) else: return None if not metadata_only: req = JBoxGS.connect().objects().get_media(bucket=bucket, object=objname) fh = open(local_file, "wb") downloader = MediaIoBaseDownload(fh, req, chunksize=JBoxGS.CHUNK_SIZE*1024*1024) done = False num_retries = 0 while not done: try: _, done = downloader.next_chunk() except HttpError, err: num_retries += 1 if num_retries > JBoxGS.MAX_RETRIES: fh.close() os.remove(local_file) raise if err.resp.status in JBoxGS.RETRYABLE_ERRORS: backoff = min(JBoxGS.BACKOFF_FACTOR ** (num_retries - 1), JBoxGS.MAX_BACKOFF) sleep(backoff + random()) else: sleep(JBoxGS.SLEEP_TIME) except:
def test_media_io_base_download(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) self.assertEqual(True, self.request.http.follow_redirects) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def download_file(service, item, download_folder='./data/', overwrite=False): if not is_downloadable(item): return False local_path = download_folder + item['path'] if os.path.isfile(local_path) and not overwrite: if file_md5(local_path) == item['md5Checksum']: return False else: print("Corrupt file '%s'" % local_path) mkdir_p(os.path.dirname(local_path)) with open(local_path, "wb") as destination: request = service.files().get_media(fileId=item['id']) downloader = MediaIoBaseDownload(destination, request) done = False while done is False: _, done = downloader.next_chunk() if file_md5(local_path) != item['md5Checksum']: raise Exception("Download for '%s' failed, wrong checksum" % local_path) return True
def get(service): # User can be prompted to input file name(using raw_input) that needs to be be downloaded. file_name = raw_input("Enter file name to be downloaded:") try: # Get Metadata req = service.objects().get( bucket=_BUCKET_NAME, object=file_name, fields='bucket,name,metadata(my-key)', ) resp = req.execute() print json.dumps(resp, indent=2) # Get Payload Data req = service.objects().get_media( bucket=_BUCKET_NAME, object=file_name, ) # The Bytes I/O object may be replaced with any io.Base instance. fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024 * 1024) # show progress at download done = False while not done: status, done = downloader.next_chunk() if status: print 'Download %d%%.' % int(status.progress() * 100) print 'Download Complete' fo = decrypt(fh.getvalue(),key) fi = open(file_name,'wb') fi.write(fo) # fh.getvalue() contains downloaded content.Decrypt the file and save it to onto your local machine print json.dumps(resp, indent=2) except client.AccessTokenRefreshError: print ("Error in the credentials")
def pull(bucket, local_file, metadata_only=False): objname = os.path.basename(local_file) k = None try: k = JBoxGS.connect().objects().get(bucket=bucket, object=objname).execute() except HttpError as err: if err._get_reason() != 'Not Found': raise(err) else: return None if metadata_only: return k else: req = JBoxGS.connect().objects().get_media(bucket=bucket, object=objname) fh = open(local_file, "wb") downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False try: while not done: _, done = downloader.next_chunk() finally: fh.close() if not done: os.remove(local_file) return k
def download(service, bucketName, objectName, filename): """ :type service: Resource :type bucketName: basestring :type objectName: basestring :type filename: basestring """ print 'Building download request...' f = file(filename, 'w') request = service.objects().get_media(bucket=bucketName, object=objectName) media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE) print 'Downloading bucket: %s object: %s to file: %s' % (bucketName, objectName, filename) progressless_iters = 0 done = False while not done: error = None try: progress, done = media.next_chunk() if progress: print_with_carriage_return( 'Download %d%%.' % int(progress.progress() * 100)) except HttpError, err: error = err if err.resp.status < 500: raise except RETRYABLE_ERRORS, err: error = err
def __init__(self, row: List[str], drive_service): self.number = int(row[0]) content_url = urlparse(row[1]) summary_url = urlparse(row[4]) repetition_material = urlparse(row[7]) if len(summary_url.scheme) > 0: self.summary_url = summary_url.geturl() else: self.summary_url = None if len(repetition_material.scheme) > 0: self.repetition_material = repetition_material.geturl() else: self.repetition_material = None if len(content_url.scheme) > 0: file_id = content_url.path.split('/')[-1] if file_id == 'open': file_id = parse_qs(content_url.query)['id'] request = drive_service.files().export_media(fileId=file_id, mimeType='text/plain') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() self.content = fh.getvalue().decode('utf-8') self.content_type = None else: self.content = None self.content_type = row[1]
def load_config(self): try: results = self._gdrive.files().list( corpora="user", q="parents='%s' and name='%s'" % (self._album_id, GdriveAlbum.CONFIG_FILE), pageSize=1, spaces='drive').execute() except Error: results = {} items = results.get('files', []) if len(items) == 0: self._config_file_id = '' self._config = {} # TODO new config with default values return config_item = items[0] self._config_file_id=config_item["id"] request = self._gdrive.files().get_media(fileId=self._config_file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) while True: _, done = downloader.next_chunk() if done: break config = json.loads(fh.getvalue()) if "albums" not in config: config["albums"] = [] if "pictures" not in config: config["pictures"] = [] self._config = config
def download(self, mime_type=None): """Download the content of the file from Google Drive Args: mime_type: the mime type of the file to download. see here: https://developers.google.com/drive/v3/web/manage-downloads#downloading_google_documents Returns: The content of the file """ if mime_type is None: download_type = MimeTypes.get_download_type(self.meta_data['mimeType']) else: download_type = mime_type req = self.service.files().export_media(fileId=self.meta_data['id'], mimeType=download_type) data = io.BytesIO() downloader = MediaIoBaseDownload(data, req) done = False while not done: _, done = downloader.next_chunk() data.seek(0) self.content = data.read() return self.content
def download_file(self, file_id, write_path, page_num=None, print_details=True, output_type=None): file_metadata = self._files.get(fileId=file_id, fields='name, id, mimeType, modifiedTime, size').execute(num_retries=self._max_retries) file_title = file_metadata['name'] modified_date = datetime.strptime(str(file_metadata['modifiedTime']), '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=utc).astimezone(timezone('Asia/Singapore')).replace(tzinfo=None) return_data = None if file_metadata['mimeType'] == 'application/vnd.google-apps.spreadsheet': assert page_num is not None download_url = 'https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%i' % (file_id, page_num) resp, content = self._service._http.request(download_url) if resp.status == 200: if output_type is not None: assert output_type in ('dataframe', 'list') from io import BytesIO with BytesIO(content) as file_buffer: if output_type == 'list': import unicodecsv as csv return_data = list(csv.reader(file_buffer)) elif output_type == 'dataframe': import pandas as pd return_data = pd.read_csv(file_buffer) else: with open(write_path, 'wb') as write_file: write_file.write(content) logging_string = '[Drive] Downloaded %s [%s]. Last Modified: %s' % (file_title, file_id, modified_date) else: raise HttpError(resp, content) else: request = self._files.get_media(fileId=file_id) with open(write_path, 'wb') as write_file: downloader = MediaIoBaseDownload(write_file, request) done = False while done is False: status, done = downloader.next_chunk() file_size = humanize.naturalsize(int(file_metadata['size'])) logging_string = '[Drive] Downloaded %s [%s] (%s). Last Modified: %s' % (file_title, file_id, file_size, modified_date) if print_details: print '\t' + logging_string if self._logger is not None: self._logger.info(logging_string) return return_data
def download_file(self, local_file_path, bucket_name, storage_file_path): f = file(local_file_path, 'wb') request = self.service.objects().get_media(bucket=bucket_name, object=storage_file_path) media = MediaIoBaseDownload(f, request) base_name = os.path.basename(local_file_path) done = False while not done: progress, done = media.next_chunk() if progress: print('{0} is download {1}/100'.format(base_name, int(100 * progress.progress())))
def stream(self, uri, file=None): file = file or self._get(uri, fields='files(id)') if file: output = tempfile.TemporaryFile() request = self.service.files().get_media(fileId=file.id) downloader = MediaIoBaseDownload(output, request) done = False while not done: status, done = downloader.next_chunk() output.seek(0) return output
def main(): drive_service = driveClient() file_id = '1pE6ZovOBy4koVwWvKwE_Pt-7whVsYRKj' request = drive_service.files().get_media(fileId=file_id) fh = io.FileIO('faw.csv', 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print(f'Download {int(status.progress() * 100)}')
def get_file(name, id): import io download_service = discovery.build('drive', 'v3', developerKey=_dev_key, http=decorator.http()) request = download_service.files().get_media(fileId=id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() fh.seek(0) return fh.read()
def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r to %r", key, fileobj_to_store_to) with self._object_client(not_found=key) as clob: req = clob.get_media(bucket=self.bucket_name, object=key) download = MediaIoBaseDownload(fileobj_to_store_to, req, chunksize=CHUNK_SIZE) done = False while not done: status, done = download.next_chunk() if status: self.log.debug("Download of %r: %d%%", key, status.progress() * 100) return self._metadata_for_key(clob, key)
def test_media_io_base_download_retries_5xx(self): self.request.http = HttpMockSequence([ ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) # Set time.sleep and random.random stubs. sleeptimes = [] download._sleep = lambda x: sleeptimes.append(x) download._rand = lambda: 10 status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) # Reset time.sleep stub. del sleeptimes[0:len(sleeptimes)] status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def _open(self, name, mode): if mode != "rb": raise ValueError("rb is the only acceptable mode for this backend") # @@@ reading files from GCS is extremely inefficient; fix me # however, for small files, who cares right? ;-) req = self.client.objects().get_media(bucket=self.bucket, object=name) buf = io.BytesIO() media = MediaIoBaseDownload(buf, req) done = False while not done: done = media.next_chunk()[1] buf.seek(0) return buf
def download_file(service, file_id, file_path): """Download file to provided path :param service: Google Drive service object :param str file_id: ID of the file to download :param str file_path: path to download the file to """ request = service.files().get_media(fileId=file_id) with open(file_path, mode='wb') as f: downloader = MediaIoBaseDownload(f, request) done = False while done is False: status, done = downloader.next_chunk()
def read_file(file_id): #media_body = MediaIoBaseUpload(fh, mimetype='application/octet-stream', chunksize=1024 * 1024, resumable=False) drive_service = create_drive_service() request = drive_service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False data = "" while done is False: status, done = downloader.next_chunk() print "Download %d%%." % int(status.progress() * 100) return fh.getvalue()
def test_media_io_base_download_retries_connection_errors(self): self.request.http = HttpMockWithErrors( 3, {'status': '200', 'content-range': '0-2/3'}, b'123') download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) download._sleep = lambda _x: 0 # do nothing download._rand = lambda: 10 status, done = download.next_chunk(num_retries=3) self.assertEqual(self.fd.getvalue(), b'123') self.assertEqual(True, done)
def test_media_io_base_download_handle_redirects(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-location': 'https://secure.example.net/lion'}, ''), ({'status': '200', 'content-range': '0-2/5'}, 'abc'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) status, done = download.next_chunk() self.assertEqual('https://secure.example.net/lion', download._uri)
def download_object(self, bucket_name, object_name, write_path): """ Downloads object in chunks. :param bucket_name: Bucket identifier. :type bucket_name: string :param object_name: Can take string representation of object resource or list denoting path to object on GCS. :type object_name: list or string :param write_path: Local path to write object to. :type write_path: string :returns: GcsResponse object. :raises: HttpError if non-retryable errors are encountered. """ resp_obj = GcsResponse('downloaded') req = self._service.objects().get_media( bucket=bucket_name, object=self._parse_object_name(object_name) ) write_file = file(write_path, 'wb') media = MediaIoBaseDownload(write_file, req, chunksize=self._chunksize) progressless_iters = 0 done = False while not done: error = None try: progress, done = media.next_chunk() except HttpError as e: error = e if e.resp.status < 500: raise except self._RETRYABLE_ERRORS as e: error = e if error: progressless_iters += 1 self._handle_progressless_iter(error, progressless_iters) else: progressless_iters = 0 resp_obj.load_resp( self.get_object(bucket_name, object_name), is_download=True ) return resp_obj
def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) request = self.gs_objects.get_media(bucket=self.bucket_name, object=key) download = MediaIoBaseDownload(fileobj_to_store_to, request, chunksize=CHUNK_SIZE) done = False while not done: try: status, done = download.next_chunk() except HttpError as ex: if ex.resp["status"] == "404": raise FileNotFoundFromStorageError(key) raise if status: self.log.debug("Download of %r: %d%%", key, status.progress() * 100) return self._metadata_for_key(key)
def download_report(youtube_reporting, report_url, local_file): request = youtube_reporting.media().download( resourceName=' ' ) request.uri = report_url fh = FileIO(local_file, mode='wb') # Stream/download the report in a single request. downloader = MediaIoBaseDownload(fh, request, chunksize=-1) done = False while done is False: status, done = downloader.next_chunk() if status: print 'Download %d%%.' % int(status.progress() * 100) print 'Download Complete!'
def get_contents_to_file(self, obj_key, filepath_to_store_to): self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to) fileobj = FileIO(filepath_to_store_to, mode="wb") try: done = False request = self.gs_objects.get_media(bucket=self.bucket_name, object=obj_key) download = MediaIoBaseDownload(fileobj, request, chunksize=CHUNK_SIZE) while not done: status, done = download.next_chunk() if status: self.log.debug("Download of %r to %r: %d%%", obj_key, filepath_to_store_to, status.progress() * 100) finally: fileobj.close() if not done: os.unlink(filepath_to_store_to)
def export_file_as_str(self, *, fileId: str, ) -> str: file_handler = io.BytesIO() request = self.service.files().export_media( fileId=fileId, mimeType='text/csv', ) downloader = MediaIoBaseDownload(file_handler, request) done = False while done is False: status, done = downloader.next_chunk() print('Downloaded {}%'.format(int(status.progress() * 100))) return file_handler.getvalue().decode('utf-8')
def read_file_from_gcs(service, bucket_id, file_id): """Reads the bucket object and get the contents We are getting the StringIO value """ req = service.objects().get_media( bucket=bucket_id, object=file_id) try: fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() # throws HttpError if this file is not found except HttpError, e: print >> sys.stderr, e
def file_download(self, item_id: str, item_name: str, dir_name: str) -> bool: """ Download file from Google Drive :param item_id: :param dir_name: :return: """ service = self.__get_service() request = service.files().get_media(fileId=item_id) self.__create_download_dir(dir_name) fh = io.FileIO(os.path.join(dir_name, item_name), mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() return done
def _open(self, name, mode): if mode != "rb": raise ValueError("rb is the only acceptable mode for this backend") req = self.client.objects().get_media(bucket=self.bucket, object=self._prefixed_name(name)) buf = self._open_io() media = MediaIoBaseDownload(buf, req) done = False try: while not done: done = media.next_chunk(num_retries=self.num_retries)[1] except HttpError as exc: if exc.resp["status"] == "404": raise IOError('object "{}/{}" does not exist'.format(self.bucket, self._prefixed_name(name))) else: raise IOError("unknown HTTP error: {}".format(exc)) buf.seek(0) return GCSFile(buf, name, self)
def drive_pull_media(drive_fid, service): """Retrieve content of a Google Drive file""" logger = logging.getLogger(__name__) # Open a Python file based on inherited Lustre file descriptor with os.fdopen(args.fd, 'wb') as lustre_file: # Get a file content by Google fileID request = service.files().get_media(fileId=drive_fid) downloader = MediaIoBaseDownload(lustre_file, request, chunksize=GAPI_MEDIA_IO_CHUNK_SIZE) # Download by chunk status, done = downloader.next_chunk() while done is False: status, done = downloader.next_chunk() if status: logger.debug("Download %d%%", int(status.progress() * 100))
def download_file(file_id) -> None: """ Downloas file from google drive Args: - file_id (str): file id to download Returns: N/A """ # must have your custom creds creds = ServiceAccountCredentials.from_json_keyfile_name( "gdrive_creds.json", scope) service = build("drive", "v3", credentials=creds) request = service.files().export_media(fileId=file_id, mimeType="text/csv") print(request) fh = open(f"dc_fss.csv", "wb") downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def test_media_io_base_download_empty_file(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-range': '0-0/0'}, b''), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) status, done = download.next_chunk() self.assertEqual(True, done) self.assertEqual(0, download._progress) self.assertEqual(0, download._total_size) self.assertEqual(0, status.progress())
def doc_googlfy(ext, file_id, service): request = service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() with open("documents/tempdoc." + ext, 'wb') as out: out.write(fh.getvalue()) file_metadata = { 'name': 'Temp File', 'mimeType': 'application/vnd.google-apps.' + extensions[ext] } media = MediaFileUpload("documents/tempdoc." + ext, resumable=True) convertFile = service.files().create(body=file_metadata, media_body=media, fields='id').execute() file_id = convertFile['id'] return file_id
def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r to %r", key, fileobj_to_store_to) request = self.gs_objects.get_media(bucket=self.bucket_name, object=key) download = MediaIoBaseDownload(fileobj_to_store_to, request, chunksize=CHUNK_SIZE) done = False while not done: try: status, done = download.next_chunk() except HttpError as ex: if ex.resp["status"] == "404": raise FileNotFoundFromStorageError(key) raise if status: self.log.debug("Download of %r: %d%%", key, status.progress() * 100) return self._metadata_for_key(key)
def download_file(service, file_id, location, filename, mime_type): if 'vnd.google-apps' in mime_type: request = service.files().export_media(fileId=file_id, mimeType='application/pdf') filename += '.csv' else: request = service.files().get_media(fileId=file_id) fh = io.FileIO(location + filename, 'wb') downloader = MediaIoBaseDownload(fh, request, 1024 * 1024 * 1024) done = False while done is False: try: status, done = downloader.next_chunk() except: fh.close() os.remove(location + filename) sys.exit(1) print(f'\rDownload {int(status.progress() * 100)}%.', end='') sys.stdout.flush() print('')
def download_file(service, filename): phrase = "name contains '" + filename + "'" # Call the Drive v3 API results = service.files().list( q=phrase, pageSize=10, fields="nextPageToken, files(id, name)").execute() print(results) import io from googleapiclient.http import MediaIoBaseDownload # if you get the shareable link, the link contains this id, replace the file_id below file_id = results['files'][0]['id'] request = service.files().get_media(fileId=file_id) # replace the filename and extension in the first field below fh = io.FileIO(filename, mode='w') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) return file_id
def _download(service, key, fp: Union[IO, BinaryIO], max_bytes_per_second: int): request = service.files().get_media(fileId=key) chunk_size = get_chunk_size(max_bytes_per_second) downloader = MediaIoBaseDownload(fp, request, chunksize=chunk_size) done = False prev_time = time.perf_counter() while not done: status, done = downloader.next_chunk() if not done and max_bytes_per_second: current_time = time.perf_counter() actual_speed = chunk_size / (current_time - prev_time) extra_time = actual_speed / max_bytes_per_second - 1 if extra_time > 0: time.sleep(extra_time) prev_time = current_time
def download_files(self, files, local_path): # convert to pathlib local_path = Path(local_path) # start service service = self.service # write each file for file in files: request = service.files().get_media(fileId=file['id']) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() with local_path.joinpath(file['name']).open('wb') as f: f.write(fh.getvalue()) return
def download_content_from_request(file_handle, request: dict, chunk_size: int) -> None: """ Download media resources. Note that the Python file object is compatible with io.Base and can be used with this class also. :param file_handle: io.Base or file object. The stream in which to write the downloaded bytes. :type file_handle: io.Base or file object :param request: googleapiclient.http.HttpRequest, the media request to perform in chunks. :type request: Dict :param chunk_size: int, File will be downloaded in chunks of this many bytes. :type chunk_size: int """ downloader = MediaIoBaseDownload(file_handle, request, chunksize=chunk_size) done = False while done is False: _, done = downloader.next_chunk() file_handle.flush()
def main(): """get the file in terms of docx... """ creds = None # The file token.json stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.json'): creds = Credentials.from_authorized_user_file('token.json', SCOPES) print(os.listdir()) drive = build('drive', 'v3', credentials=creds) request = drive.files().export_media(fileId='1aKf0ffoL7XjR26npjBmNcKClULSlGDIrlQx_E8dNXxI', mimeType='application/pdf') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) fh.seek(0) with open('PDFs/CV_mcdanal.pdf', 'wb') as f: # it's only been loaded into RAM! shutil.copyfileobj(fh, f, length=131072)
def download_File_after_searching_Api(drive_service, file_name, filepath): logger.debug(driver_service) file_id = get_list_of_file_from_drive_based_on_query( drive_service, 5, f"name contains '{file_name}'")[0]['id'] res = {} request = drive_service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False try: while done is False: status, done = downloader.next_chunk() logger.debug("Download %d%%." % int(status.progress() * 100)) with io.open(filepath, 'wb') as f: fh.seek(0) f.write(fh.read()) res = {'Message': 'Downloaded to local Drive Sucessfully'} except Exception as e: res = {'Error': f'Error in Download {e}'} return res
def download_file(self, fileId, filePath, filedstmimytype=''): # Note: The parent folders in filePath must exist print("-> Downloading file with id: {0} name: {1}".format( fileId, filePath)) request = self.__service.files().export(fileId=fileId, mimeType=filedstmimytype) fh = io.FileIO(filePath, mode='wb') try: downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 1024) done = False while done is False: status, done = downloader.next_chunk(num_retries=2) if status: print("Download %d%%." % int(status.progress() * 100)) print("Download Complete!") finally: fh.close()
def test_media_io_base_download_unknown_media_size(self): self.request.http = HttpMockSequence([ ({'status': '200'}, b'123') ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), b'123') self.assertEqual(True, done) self.assertEqual(3, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(0, status.progress())
def get_summary_file(gridname): global grid_list, driveId file_name = grid_list['summary_file'][grid_list['name'] == gridname].iloc[0] if os.path.isfile('temp/'+file_name): print('loading local grid: ', file_name) data = pd.read_csv('temp/'+file_name) else: file_id = grid_list['summary_file_id'][grid_list['name'] == gridname].iloc[0] request = service.files().get_media(fileId=file_id) with tempfile.NamedTemporaryFile() as temp: downloader = MediaIoBaseDownload(temp, request) done = False while done is False: status, done = downloader.next_chunk() data = pd.read_csv(temp.name) return data
def _download_file(self, path: str, name: str, **kwargs) -> None: request = self._service.files().get_media(fileId=kwargs['id'], supportsTeamDrives=True) with io.FileIO(os.path.join(path, name), 'wb') as d_f: d_file_obj = MediaIoBaseDownload(d_f, request, chunksize=50 * 1024 * 1024) c_time = time.time() done = False while done is False: status, done = d_file_obj.next_chunk() if self._is_canceled: raise ProcessCanceled if status: f_size = status.total_size diff = time.time() - c_time downloaded = status.resumable_progress percentage = downloaded / f_size * 100 speed = round(downloaded / diff, 2) eta = round((f_size - downloaded) / speed) tmp = \ "__Downloading From GDrive...__\n" + \ "```[{}{}]({}%)```\n" + \ "**File Name** : `{}`\n" + \ "**File Size** : `{}`\n" + \ "**Downloaded** : `{}`\n" + \ "**Completed** : `{}/{}`\n" + \ "**Speed** : `{}/s`\n" + \ "**ETA** : `{}`" self._progress = tmp.format( "".join( ["█" for i in range(math.floor(percentage / 5))]), "".join([ "░" for i in range(20 - math.floor(percentage / 5)) ]), round(percentage, 2), name, humanbytes(f_size), humanbytes(downloaded), self._completed, self._list, humanbytes(speed), time_formatter(eta)) self._completed += 1 _LOG.info("Downloaded Google-Drive File => Name: %s ID: %s", name, kwargs['id'])
def main(): # Disable OAuthlib's HTTPS verification when running locally. # *DO NOT* leave this option enabled in production. os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" api_service_name = "youtube" api_version = "v3" client_secrets_file = "thi.json" # Get credentials and create an API client #flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file( #client_secrets_file, scopes) # credentials = flow.run_console() #pickle.dump(credentials, open("token.pkl", "wb")) credentials = pickle.load(open("token.pkl", "rb")) youtube = googleapiclient.discovery.build( api_service_name, api_version, credentials=credentials) request = youtube.captions().list( videoId="Q2aEzeMDHMA", part="snippet" ) response = request.execute() print(response['items'][0]['id']) id1 = response['items'][0]['id'] request = youtube.captions().download( id = id1, tfmt="sbv" ) # TODO: For this request to work, you must replace "YOUR_FILE" # with the location where the downloaded content should be written. fh = io.FileIO("cvfg.txt", "wb") download = MediaIoBaseDownload(fh, request) complete = False while not complete: status, complete = download.next_chunk()
def get_gdrive_params(): obj = lambda: None lmao = { "auth_host_name": 'localhost', 'noauth_local_webserver': 'store_true', 'auth_host_port': [8080, 8090], 'logging_level': 'ERROR' } for k, v in lmao.items(): setattr(obj, k, v) # authorization boilerplate code SCOPES = 'https://www.googleapis.com/auth/drive.readonly' store = file.Storage('token.json') creds = store.get() # The following will give you a link if token.json does not exist, the link allows the user to give this app permission if not creds or creds.invalid: flow = client.flow_from_clientsecrets('client_id.json', SCOPES) creds = tools.run_flow(flow, store, obj) DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http())) #ID of shareable link #file_id = '1Ht1Q_ZNcg7PXjwovVI-RLt-ZuUSAaW0wmFWS0OKdSgw' file_id = '1-2Cj3U7avgbnsjQEZuDwe8q9wRL4zL3D' request = DRIVE.files().get_media(fileId=file_id) # replace the filename and extension in the first field below fh = io.FileIO('MAR_VAD_params.txt', mode='w') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) params = [] with open("MAR_VAD_params.txt", 'r', encoding="utf-8-sig") as f: for line in f: params.append(float(line)) #params[0] = VAD_agg and params[1] = MOUTH_AR_THRESHOLD return int(params[0]), params[1]
def downloadFile(downloadLocation, filename, autoOpen=False): """Download specified file inside my google drive account to the specified working directory """ store = file.Storage('token.json') creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets('credentials.json', SCOPES) creds = tools.run_flow(flow, store) service = build('drive', 'v3', http=creds.authorize(Http())) # use contains instead of = to allow user don't specify file extention queryString = """name contains '{0}'""".format(filename) request = service.files().list(pageSize=10, fields="*", q=queryString) results = request.execute() items = results.get('files', []) if len(items) == 0: print('no such file') elif len(items) > 1: print('more than 1 such file') else: fullname = items[0]['name'] file_id = items[0]['id'] mime_type = items[0]['mimeType'] targeLocation = os.path.join(downloadLocation, fullname) #print(fullname) #print(downloadPath) request = service.files().get_media(fileId=file_id) fh = io.FileIO(targeLocation, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) if done: fh.close() if autoOpen: if platform.system() == 'Mac': subprocess.call(['open', targeLocation], shell=True) elif platform.system() == 'Windows': subprocess.call(['start', targeLocation], shell=True)
def download_file(self, file_id, file_path): """Descarga un documento de Drive, pero no un documento de la suit de Google. Args: file_id (string): ID del documento a descargar. file_path (path or string): Path to the file. """ if not file_id: raise Exception("File ID is required") if not file_path: raise Exception("File path is required") _file_path = pathlib.Path(file_path) request = None service = self.conn() try: request = service.files().get_media(fileId=file_id) except HttpError as error: LogHandler.drive_logs(error) service.close() return False fh = io.FileIO(str(_file_path), mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() # print(f"{int(status.progress() * 100)}") service.close() fh.close() return file_path
def download_all_files(service, ids, names, dir): """Downloads all files specified by id Args: service: Drive API service insetance ids: list of ids corresponding to files to download dir: directory to download files to Returns: Downloads all files to specified directory """ # check if specified directory exists # if not, create it path = os.path.join(getcwd(), dir) if not os.path.isdir(path): print('creating directory %s' % path) os.mkdir(path) # directory must exist assert os.path.isdir(path) # length of ids and names is equal assert len(ids) == len(names) length = len(ids) i = 1 for id, name in zip(ids, names): request = service.files().get_media(fileId=id) fh = FileIO(os.path.join(path, name), mode='wb') downloader = MediaIoBaseDownload(fh, request) print('downloading file %d/%d' % (i, length)) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) i += 1
def download_video_nvr(room, date, time, filename=None, need_folder=False): try: rooms = pickle.loads(open("rooms.pickle", "rb").read()) except: raise Exception("No file containing rooms' ids") room_id = rooms[room][0] tag = rooms[room][1] results = look_into_drive(room_id, date) if len(results['files']) > 1: raise Exception("More then one directory on Google drive") elif len(results['files']) == 0: raise Exception("No files found on drive") time_id = results['files'][0]['id'] results = look_into_drive(time_id, time) if len(results['files']) > 1: raise Exception("More then one directory on Google drive") elif len(results['files']) == 0: raise Exception("No files found on drive") if tag is not None: results = look_into_drive(results['files'][0]['id'], date + "_" + time + "_" + room + "_" + tag) if filename is None: [hour, minute] = time.split(":") filename = "queue/" + date + "_" + hour + "-" + minute + "_" + room + "_" + tag + ".mp4" if len(results['files']) > 1: raise Exception("More then one file on Google drive") elif len(results['files']) == 0: raise Exception("No files found on drive") request = service.files().get_media(fileId=results['files'][0]['id']) fh = io.FileIO(filename, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() print("Download process is %d%%. " % int(status.progress() * 100)) if need_folder: return results['files'][0]['parents'] else: return filename
def get_file(file): try: request = drive_service.files().get_media(fileId=file.id) fh = BytesIO() downloader = MediaIoBaseDownload(fd=fh, request=request) done = False while not done: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) except: request = drive_service.files().export_media(fileId=file.id, mimeType=file.mimeType) fh = BytesIO() downloader = MediaIoBaseDownload(fd=fh, request=request) done = False while not done: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) fh.seek(0, os.SEEK_END) print(fh.tell()) fh.seek(0) fh.name = file['name'] # with open(os.path.join(f"./../Carpeta/{fh.name}"), "wb") as f: # f.write(fh.read()) # f.close() return fh
def _get_dataset_data(creds): # Connect to the API service service = build('drive', 'v3', credentials=creds) # request the file id from the API. query_string = f"name = {DATASET_FILE_NAME}" results = service.files().list(q=query_string, fields="files(id, name)").execute() items = results.get('files', []) if len(items) == 0: raise NameError( f"Could not find file with the name <{DATASET_FILE_NAME}> on the drive." ) if len(items) > 1: raise NameError( f"More than one file with the name <{DATASET_FILE_NAME}> found on the drive." ) request = service.files().get_media(fileId=items[0].get('id')) fh = io.BytesIO() # Initialise a downloader object to download the file downloader = MediaIoBaseDownload(fh, request, chunksize=204800) done = False try: # Download the data in chunks: while not done: _, done = downloader.next_chunk() fh.seek(0) raw_data = list(csv.reader(fh)) data = raw_data[1:] #Droping the header. return data except: #TODO: Maybe we should handle some exceptions? raise
def download_media(config, auth, resource_name): if config.verbose: print('SDF: Start Download') downloadRequest = API_DV360(config, auth).media().download_media( resourceName=resource_name).execute(run=False) # Create output stream for downloaded file outStream = io.BytesIO() # Make downloader object downloader = MediaIoBaseDownload(outStream, downloadRequest) # Download media file in chunks until finished download_finished = False while download_finished is False: _, download_finished = downloader.next_chunk() if config.verbose: print('SDF: End Download') return outStream
def parse_json(service, file): request = service.files().get_media(fileId=file['id']) svfile = open(file['title'], mode="wb") media_request = MediaIoBaseDownload(svfile, request) download_progress, done = media_request.next_chunk() svfile.close() with open(file['title'], mode="r") as jsfile: data = json.load(jsfile) x = [] for val in data['valueList']: raw = val['input'] d = [raw['Chart_1'], raw['Chart_2'], raw['Chart_3']] x.append(d) jsfile.close() os.remove(file['title']) return x
def __download_file(self, file_id, path, filename, mime_type): request = self.__service.files().get_media(fileId=file_id) filename = filename.replace('/', '') fh = FileIO('{}{}'.format(path, filename), 'wb') downloader = MediaIoBaseDownload(fh, request, chunksize=50 * 1024 * 1024) done = False while not done: if self.is_cancelled: fh.close() break try: self.dstatus, done = downloader.next_chunk() except HttpError as err: if err.resp.get('content-type', '').startswith('application/json'): reason = jsnloads(err.content).get('error').get( 'errors')[0].get('reason') if reason not in [ 'downloadQuotaExceeded', 'dailyLimitExceeded', ]: raise err if USE_SERVICE_ACCOUNTS: if self.__sa_count == len( listdir("accounts")) or self.__sa_count > 50: self.is_cancelled = True raise err else: self.__switchServiceAccount() LOGGER.info(f"Got: {reason}, Trying Again...") return self.__download_file( file_id, path, filename, mime_type) else: self.is_cancelled = True LOGGER.info(f"Got: {reason}") raise err self._file_downloaded_bytes = 0
def drive_download_file(drive_service, file_id, output_filename, verbose=False): ''' Downloads the file with the given file ID on the user's Google Drive to the local file with the path `output_filename`. Arguments: - `drive_service`: - `file_id`: - `output_filename`: - `verbose`: ''' request = drive_service.files().get_media(fileId=file_id) with open(output_filename, 'wb') as output_file: downloader = MediaIoBaseDownload(output_file, request) done = False while done is False: status, done = downloader.next_chunk() if verbose: logger.info("Download %d%%.", int(status.progress() * 100))
def download_file_media( self, file_id: str, path: Optional[str], export_format: Optional[str] ) -> Union[io.BytesIO, io.FileIO]: """ response.getvalue() -> bytes fh.getvalue().decode("utf-8") -> string """ if export_format: request = self.service.files().export_media( fileId=file_id, mimeType=export_format ) else: request = self.service.files().get_media(fileId=file_id) fh = io.FileIO(path, "wb") if path else io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: _, done = downloader.next_chunk() return fh
def download_by_file_id(self, file_id, destination_fname): request = self.service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False try: # Download the data in chunks while not done: status, done = downloader.next_chunk() fh.seek(0) # Write the received data to the file with open(destination_fname, 'wb') as f: shutil.copyfileobj(fh, f) print("File Downloaded") except: # Return False if something went wrong print("Something went wrong.")
def load_image(file_id, name, tstamp): """Shows basic usage of the Drive v3 API. Prints the names and ids of the first 10 files the user has access to. """ creds = None # The file token.pickle stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.pickle'): with open('token.pickle', 'rb') as token: creds = pickle.load(token) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) service = build('drive', 'v3', credentials=creds) # https://developers.google.com/drive/api/v3/manage-downloads#download_a_file_stored_on_google_drive request = service.files().get_media(fileId=file_id) # https://stackoverflow.com/questions/36173356/google-drive-api-download-files-python-no-files-downloaded #tstamp = "images/"+tstamp.replace("/","").replace(" ","").replace(":","")+".jpg" curPath = os.getcwd() tstamp = curPath + "/../../assets/images/team/"+tstamp.replace("/","").replace(" ","").replace(":","")+".jpg" fh = io.FileIO(tstamp,'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Downloading "+name+"'s photo: %d%%." % int(status.progress() * 100))