def create_file(self, parent_id, name, modified_datetime, file_local_path): if os.stat(file_local_path).st_size > 0: # Create an upload session r = self._do_request( 'post', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'items/{}:/{}:/createUploadSession'.format( parent_id, name) ])) file_id = self._upload_file(r.json()['uploadUrl'], file_local_path) self._update_file_last_modified(file_id, modified_datetime) else: # Create zero length file r = self._do_request( 'put', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'items/{}:/{}:/content'.format(parent_id, name) ])) file_id = r.json()['id'] self._update_file_last_modified(file_id, modified_datetime) self._verfiy_upload(file_local_path, file_id) return file_id
def __init__(self, account_id, config_dir_path, config_pw): self._config = {'account_name': account_id} self._config_dir_path = config_dir_path self._config_pw = config_pw self._api_drive_endpoint_prefix = http_server_utils.join_url_components( [MicrosoftServerData.apis_domain, 'v1.0/me/drive']) self._api_drive_batch_url = http_server_utils.join_url_components( [MicrosoftServerData.apis_domain, 'v1.0/$batch']) self._load_config(account_id)
def __init__(self, account_id, config_dir_path, config_pw): self._config = {'account_name': account_id} self._config_dir_path = config_dir_path self._config_pw = config_pw self._api_drive_endpoint_prefix = http_server_utils.join_url_components( [GoogleServerData.apis_domain, 'drive/v3']) self._api_upload_endpoint_prefix = http_server_utils.join_url_components( [GoogleServerData.apis_domain, 'upload/drive/v3']) self._load_config(account_id)
def get_access_tokens(scope_str, client_id, user_browser_timeout=600, no_user_form=False): port = http_server_utils.find_free_port() user_form_url = http_server_utils.join_url_components( [MicrosoftServerData.user_form_domain, 'common/oauth2/v2.0/authorize?scope={}&redirect_uri=http%3A//127.0.0.1%3A{}/myapp' '&response_type=code&response_mode=query&client_id={}'.format( scope_str, port, client_id)]) GetHandler.query_dict = {} httpd = HTTPServer(('', port), GetHandler) httpd.timeout = user_browser_timeout with httpd: if no_user_form is False: print('In a browser, navigate to the following url and fill out the Microsoft authorization form:') print(user_form_url) else: # If running a local test, we need to act like the user and send the initial request r = requests.get(user_form_url) r.raise_for_status() httpd.handle_request() if 'error' in GetHandler.query_dict: raise ValueError('Microsoft auth error: {}'.format(GetHandler.query_dict['error'])) if 'code' not in GetHandler.query_dict: raise ValueError('Microsoft auth didn\'t return an access code - {}') # We have an access code, use it to get the final token data data = {'code': GetHandler.query_dict['code'], 'client_id': client_id, 'scope': scope_str, 'redirect_uri': 'http://127.0.0.1:{}/myapp'.format(port), 'grant_type': 'authorization_code'} r = requests.post( http_server_utils.join_url_components([MicrosoftServerData.user_form_domain, 'common/oauth2/v2.0/token']), data=data) r.raise_for_status() # Returned 200 res = r.json() if ('access_token' not in res or 'expires_in' not in res or 'scope' not in res or 'refresh_token' not in res): raise ValueError('Malformed access token data received') return res
def _do_direct_upload(self, file_local_path, modified_datetime, parent_id=None, file_name=None, file_id=None): """ Either file_id must be set (file update) or parent_id AND file_name must be set (new file). """ # Bog-standard requests doesn't appear to support having a dict/string second in # front of the file section. So we are using requests-toolkit - this also has the # benefit of streaming the upload data. attr_dict = { 'content_modified_at': _convert_dt_to_pcloud_string(modified_datetime) } if parent_id is not None: attr_dict['parent'] = { 'id': str(_integer_id_from_str_id(parent_id)) } if file_name is not None: attr_dict['name'] = file_name with open(file_local_path, 'rb') as f: m_encoder = MultipartEncoder(fields={ 'attributes': json.dumps(attr_dict), 'file': (str(file_name), f) }) if file_id is None: url = http_server_utils.join_url_components( [self._api_upload_endpoint_prefix, 'files/content']) else: url = http_server_utils.join_url_components([ self._api_upload_endpoint_prefix, 'files/{}/content'.format(_integer_id_from_str_id(file_id)) ]) r, rx_dict = self._do_request( 'post', url, params={'fields': 'id,sha1'}, data=m_encoder, headers={ 'Content-Type': m_encoder.content_type, 'content-md5': hash_utils.calc_file_sha1_hex_str(file_local_path) }) return _str_id_from_file_integer_id(rx_dict['entries'][0]['id'])
def _get_item_metadata(self, item_id): if _id_is_folder(item_id): r, rx_dict = self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'listfolder']), params={'folderid': _integer_id_from_str_id(item_id)}) else: r, rx_dict = self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'stat']), params={'fileid': _integer_id_from_str_id(item_id)}) return rx_dict['metadata']
def download_file_by_id(self, file_id, output_dir_path, output_filename=None): file_meta = self._get_file_meta(file_id) if output_filename is None: output_filename = file_meta['name'] output_file_path = os.path.join(output_dir_path, output_filename) r, rx_dict = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'files/{}/content'.format( _integer_id_from_str_id(file_id)) ]), stream=True) with open(output_file_path, 'wb') as f: for chunk in r.iter_content(chunk_size=128): f.write(chunk) # Set modified time os.utime(output_file_path, times=(datetime.datetime.utcnow().timestamp(), date_parser.isoparse( file_meta['content_modified_at']).timestamp()))
def download_file_by_id(self, file_id, output_dir_path, output_filename=None): # Get the modified time and name file_meta = self._get_file_metadata(file_id) if output_filename is None: output_filename = file_meta['name'] # Download the data # Special requests mode for streaming large files if self._refresh_token_required(): self.refresh_token() with requests.get(http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'files', file_id]), headers={'Authorization': self._get_auth_header()}, params={'alt': 'media'}) as r: r.raise_for_status() os.makedirs(output_dir_path, exist_ok=True) with open(os.path.join(output_dir_path, output_filename), 'wb') as f: for chunk in r.iter_content(chunk_size=128): f.write(chunk) # Set modified time os.utime(os.path.join(output_dir_path, output_filename), times=(datetime.datetime.utcnow().timestamp(), convert_google_string_to_utc_datetime( file_meta['modifiedTime']).timestamp()))
def _get_folder_id_from_path(self, folder_path): """ :param folder_path: folder path relative to server drive root. :return: id of folder metadata. None if the folder doesn't exist. """ if StoreTree.standardise_path(folder_path) == '': return _box_root_folder_id parent_folder_id = _integer_id_from_str_id(_box_root_folder_id) for folder_name in StoreTree.get_path_levels(folder_path): # Get folders in parent folder, look for current folder entries =\ self._do_paginated_get( http_server_utils.join_url_components([self._api_drive_endpoint_prefix, 'folders/{}/items'.format(parent_folder_id)]), 'entries', params={'fields': 'id,name'}) parent_folder_id = None for item in entries: if item['name'] == folder_name: parent_folder_id = item['id'] break if parent_folder_id is None: return None return _str_id_from_folder_integer_id(parent_folder_id)
def _verfiy_upload(self, file_local_path, file_id): """ Checks that the sha1 of the local file matches that of the file on the server. If not, the file will be deleted and an error message logged. :param file_local_path: If None, will assume empty file. :param file_id: :return: True if the md5 of the local file matches that of the file on the server, False otherwise. """ local_sha1 = hash_utils.calc_file_sha1_hex_str(file_local_path) r, rx_dict = self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'checksumfile']), params={'fileid': _integer_id_from_str_id(file_id)}) if 'sha1' not in rx_dict or rx_dict['sha1'] != local_sha1: # Hashes don't match, delete the file on the server self.delete_item_by_id(file_id) logger.error( 'Checksums after upload of file {} to Pcloud didn\'t match, ' 'deleted the file on the server.'.format(file_local_path)) return False return True
def create_folder(self, parent_id, name): """ :param parent_id: parent folder id. :param name: name of new folder. :return: the id of the created folder. """ r, rx_dict = self._do_request( 'post', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'folders']), params={'fields': 'id'}, json={ 'parent': { 'id': _integer_id_from_str_id(parent_id) }, 'name': name }, ignore_codes=[409]) if r.status_code == 409 and rx_dict['code'] == 'item_name_in_use': return _str_id_from_folder_integer_id( rx_dict['context_info']['conflicts'][0]['id']) return _str_id_from_folder_integer_id(rx_dict['id'])
def update_file(self, file_id, modified_datetime, file_local_path): # Get upload url r, rx_dict = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'resources/upload' ]), params={ 'path': file_id, 'overwrite': 'true' }) with open(file_local_path, 'rb') as f: r, rx_dict = self._do_request(rx_dict['method'].lower(), rx_dict['href'], data=f) # Now verify the upload file_meta = self._get_item_metadata(file_id) if file_meta['md5'] != hash_utils.calc_file_md5_hex_str( file_local_path): logger.error( 'Server md5 hash for file {} doesn\'t match local, deleting file on server.' .format(file_local_path)) self.delete_item_by_id(file_id) return None # Set mod timestamp self._set_item_custom_mtime(file_id, modified_datetime) return _yandex_id_from_yandex_path(file_meta['path'])
def clear_trash(self): r, rx_dict = self._do_request( 'delete', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'trash/resources'])) if r.status_code == 202: self._wait_for_status_complete(rx_dict['href'], rx_dict['method'])
def delete_item_by_id(self, item_id): # Work out if its a file or folder meta_dict = self._get_item_metadata(item_id) if meta_dict['isfolder'] == True: self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'deletefolderrecursive']), params={'folderid': _integer_id_from_str_id(item_id)}) else: self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'deletefile']), params={'fileid': _integer_id_from_str_id(item_id)})
def _get_file_metadata(self, item_id): r = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'items/{}'.format(item_id) ]), params={'select': 'id,name,fileSystemInfo,file'}) return r.json()
def _get_file_metadata(self, file_id): r = self._do_request( 'get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'files', file_id]), params={'fields': 'name, parents, modifiedTime, md5Checksum'}, error_500_retries=5) return r.json()
def _get_file_meta(self, file_id): r, rx_dict = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'files/{}'.format( _integer_id_from_str_id(file_id)) ]), params={'fields': _metadata_fields}) return rx_dict
def _get_root_metadata(self): """ Returns the metadata dict for the root of the server drive. :return: """ r = self._do_request('get', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'root']), params={'select': 'id,name,fileSystemInfo'}) return r.json()
def delete_item_by_id(self, item_id): r, rx_dict = self._do_request('delete', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'resources' ]), params={'path': item_id}) if r.status_code == 202: self._wait_for_status_complete(rx_dict['href'], rx_dict['method'])
def do_POST(self): if self.testing_handle_google_token_refresh() is True: return rx_params = json.loads(self.rfile.read(int(self.headers['Content-Length'])).decode()) setattr(CreateFileHandler, 'file_name', rx_params['name']) self.send_success_response(response_content_string=json.dumps({'id': '123456'}), extra_headers={'Location': http_server_utils.join_url_components([ GoogleServerData.apis_domain, 'dummy_url'])})
def _get_root_folder(self): """ :return: A {'id': , 'name': ,} dict representing the root folder. """ r = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'files', 'root' ]), error_500_retries=5) return r.json()
def _update_file_last_modified(self, item_id, modified_datetime): r = self._do_request( 'patch', http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'items/{}'.format(item_id)]), json={ 'fileSystemInfo': { 'lastModifiedDateTime': _convert_dt_to_onedrive_string(modified_datetime) } })
def _get_root_metadata(self): """ Returns the metadata dict for the root of the server drive. :return: """ r, rx_dict = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'listfolder' ]), params={'folderid': 0}) return rx_dict['metadata']
def delete_item_by_id(self, item_id): if _id_is_folder(item_id): r, rx_dict = self._do_request( 'delete', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'folders/{}'.format(_integer_id_from_str_id(item_id)) ]), params={'recursive': 'true'}, ignore_codes=[503]) if r.status_code == 503: logger.warning( 'Box is taking an extended time to delete folder {}.'. format(item_id)) else: self._do_request( 'delete', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'files/{}'.format(_integer_id_from_str_id(item_id)) ]))
def clear_trash(self): entries_list = self._do_paginated_get( http_server_utils.join_url_components( [self._api_drive_endpoint_prefix, 'folders/trash/items']), 'entries', params={'fields': _metadata_fields}) for item in entries_list: if item['type'] == 'folder': self._do_request( 'delete', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'folders/{}/trash'.format(item['id']) ])) else: self._do_request( 'delete', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'files/{}/trash'.format(item['id']) ]))
def _set_item_custom_mtime(self, item_path, modified_datetime): r, rx_dict = self._do_request('patch', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'resources' ]), params={'path': item_path}, json={ 'custom_properties': { 'mtime_ns': modified_datetime.timestamp() } })
def get_root_file_tree(self, root_folder_path=''): """ This is a generator function. Each iteration returned will be an instance of StoreTree - this instance will just show the progress. Just use the last one returned for a complete tree. :param root_folder_path: the path to the root folder of the desired store. :return: StoreTree instance. """ root_folder_id = self._get_folder_id_from_path(root_folder_path) if root_folder_id is None: raise ValueError( 'Root {} doesn\'t appear to exist.'.format(root_folder_path)) result_tree = StoreTree(root_folder_id) # Another provider that forces us to traverse every folder... stack = [_integer_id_from_str_id(root_folder_id)] while len(stack) > 0: parent_folder_id = stack.pop() # Get folders in parent folder, look for current folder entries = \ self._do_paginated_get( http_server_utils.join_url_components([self._api_drive_endpoint_prefix, 'folders/{}/items'.format(parent_folder_id)]), 'entries', params={'fields': _metadata_fields}) for item in entries: if item['type'] == 'folder': result_tree.add_folder( _str_id_from_folder_integer_id(item['id']), name=item['name'], parent_id=_str_id_from_folder_integer_id( parent_folder_id)) stack.append(item['id']) else: result_tree.add_file( _str_id_from_file_integer_id(item['id']), name=item['name'], parent_id=_str_id_from_folder_integer_id( parent_folder_id), modified_datetime=date_parser.isoparse( item['content_modified_at']), file_hash=item['sha1']) yield result_tree
def update_file(self, file_id, modified_datetime, file_local_path): """ :param file_id: The id of the file to update. :param modified_datetime: Modified time. :param file_local_path: :return: True if successful. """ if os.stat(file_local_path).st_size > 0: # Create an upload session r = self._do_request( 'post', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'items/{}/createUploadSession'.format(file_id) ]), json={ 'fileSystemInfo': { "lastModifiedDateTime": _convert_dt_to_onedrive_string(modified_datetime) } }) self._upload_file(r.json()['uploadUrl'], file_local_path) self._update_file_last_modified(file_id, modified_datetime) else: # Create zero length file r = self._do_request( 'put', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'items/{}/content'.format(file_id) ])) self._update_file_last_modified(file_id, modified_datetime) self._verfiy_upload(file_local_path, file_id)
def _get_folder_path_metadata(self, folder_path): folder_path = _pcloud_path_standardise(folder_path) r, rx_dict = self._do_request('get', http_server_utils.join_url_components([ self._api_drive_endpoint_prefix, 'listfolder' ]), params={'path': folder_path}, ignore_codes=[PCLOUD_ERROR_NOT_EXIST]) if rx_dict['result'] == PCLOUD_ERROR_NOT_EXIST: raise ValueError( 'Couldn\'t find folder with path {}'.format(folder_path)) return rx_dict['metadata']
def create_file(self, parent_id, name, modified_datetime, file_local_path): """ :param parent_id: The id of the new file's parent folder. :param name: The name to give the file on the remote server. :param modified_datetime: Modified time. :param file_local_path: :return: The id of the newly created file. """ if os.stat(file_local_path).st_size == 0: return self.create_empty_file(parent_id, name, modified_datetime) retries = 0 retry_sleep = 0.5 while retries < 10: r = self._do_request( 'post', http_server_utils.join_url_components( [self._api_upload_endpoint_prefix, 'files']), params={ 'uploadType': 'resumable', 'fields': 'id' }, json={ 'name': name, 'modifiedTime': convert_dt_to_google_string(modified_datetime), 'parents': [parent_id] }, error_500_retries=5) session_url = r.headers['Location'] res = self._upload_file_data(session_url, file_local_path, previous_retry_secs=retry_sleep) if isinstance(res, str) is True: self._verfiy_upload(file_local_path, res) return res retry_sleep = res time.sleep(res) retries += 1 raise ConnectionError('Too many retries for file upload')