def _delete_remote_resource(self, resource: str) -> str: """ Choose a function, invoke it to delete a remote resource. """ delete_funcs = { 'export': export_delete, 'import': import_delete, } debug_step(f'deleting: {resource}') resp = delete_funcs[self.remote_key]( self.env, self.pnum, self.token, resource, session=self.session, group=self.group, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def _complete_resumable( env: str, pnum: str, filename: str, token: str, url: str, bar: Bar, session: Any = requests, mtime: Optional[str] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token headers = {'Authorization': f'Bearer {token}'} if mtime: headers['Modified-Time'] = mtime debug_step('completing resumable') resp = session.patch(url, headers=headers) resp.raise_for_status() bar.finish() debug_step('finished') return {'response': json.loads(resp.text), 'tokens': tokens}
def _parse_ignore_data(self, patterns: str) -> list: # e.g. .git,build,dist if not patterns: return [] else: debug_step(f'ignoring patterns: {patterns}') return patterns.replace(' ', '').split(',')
def _find_local_resources(self, path) -> list: """ Recursively list the given path. Ignore prefixes a and suffixes if they exist. """ resources = [] integrity_reference = None debug_step('finding local resources to transfer') for directory, subdirectory, files in os.walk(path): folder = directory.replace(f'{path}/', '') ignore_prefix = False for prefix in self.ignore_prefixes: if folder.startswith(prefix): ignore_prefix = True break if ignore_prefix: continue for file in files: ignore_suffix = False for suffix in self.ignore_suffixes: if file.endswith(suffix): ignore_suffix = True break if ignore_suffix: continue target = f'{directory}/{file}' if self.sync_mtime: integrity_reference = str(os.stat(target).st_mtime) resources.append((target, integrity_reference)) return resources
def _transfer_remote_to_local(self, resource, integrity_reference=None) -> str: """ Download a resource from the remote location, resuming if local data is found, and it the integrity reference did not change since the first portion was downloaded. """ target = os.path.dirname(resource) if not os.path.lexists(target): debug_step(f'creating directory: {target}') os.makedirs(target) resp = export_get( self.env, self.pnum, resource, self.token, session=self.session, etag=integrity_reference, no_print_id=True, set_mtime=self.sync_mtime, backend=self.remote_key, ) return resource
def session_update( env: str, pnum: str, token_type: str, token: str, refresh_token: Optional[str] = None, ) -> None: default = { 'prod': {}, 'alt': {}, 'test': {}, 'ec-prod': {}, 'ec-test': {}, 'dev': {}, } if not session_file_exists(): debug_step('creating new tacl session store') data = default try: with open(SESSION_STORE, 'r') as f: data = yaml.load(f, Loader=yaml.Loader) except FileNotFoundError: data = default target = data.get(env, {}).get(pnum, {}) target[token_type] = token target[f'{token_type}_refresh'] = refresh_token if not data.get(env): data[env] = {} data[env][pnum] = target debug_step('updating session') with open(SESSION_STORE, 'w') as f: f.write(yaml.dump(data, Dumper=yaml.Dumper))
def nacl_get_server_public_key(env: str, pnum: str, token: str) -> bytes: host = HOSTS.get(env) debug_step('getting public key') resp = requests.get( f'https://{host}/v1/{pnum}/files/crypto/key', headers={'Authorization': f'Bearer {token}'}, ) if resp.status_code != 200: raise AuthzError encoded_public_key = json.loads(resp.text).get('public_key') return libnacl.public.PublicKey(base64.b64decode(encoded_public_key))
def check_api_connection(env: str) -> None: if env == "dev": return if os.getenv("HTTPS_PROXY"): debug_step('skipping connection test as a proxy is set') return if not has_api_connectivity(hostname=API_ENVS[env]): sys.exit( dedent(f'''\ The API environment hosted at {ENV[env]} is not accessible from your current network connection. Please contact TSD for help: {HELP_URL}'''))
def _transfer_local_to_remote( self, resource: str, integrity_reference: Optional[str] = None, ) -> str: """ Upload a resource to the remote destination, either as a basic stream, or a resumable - depending on the size of the $CHUNK_THRESHOLD. """ if not os.path.lexists(resource): print(f'WARNING: could not find {resource} on local disk') return resource if os.stat(resource).st_size > self.chunk_threshold: resp = initiate_resumable( self.env, self.pnum, resource, self.token, chunksize=self.chunk_size, group=self.group, verify=True, is_dir=True, session=self.session, set_mtime=self.sync_mtime, public_key=self.public_key, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) else: resp = streamfile( self.env, self.pnum, resource, self.token, group=self.group, is_dir=True, session=self.session, set_mtime=self.sync_mtime, public_key=self.public_key, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) if resp.get("session"): debug_step("renewing session") self.session = resp.get("session") if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def upload_resource_name(filename, is_dir, group=None): if not is_dir: debug_step('uploading file') resource = quote(format_filename(filename)) elif is_dir: debug_step('uploading directory (file)') if filename.startswith('/'): target = filename[1:] else: target = filename resource = f'{group}/{quote(target)}' return resource
def streamfile(env, pnum, filename, token, chunksize=4096, group=None, backend='files', is_dir=False, session=requests, set_mtime=False): """ Idempotent, lazy data upload from files. Parameters ---------- env: str - 'test' or 'prod' pnum: str - project number filename: path to file token: JWT chunksize: bytes to read per chunk custom_headers: header controlling API data processing group: name of file group which should own upload backend: which API backend to send data to is_dir: bool, True if uploading a directory of files, will create a different URL structure session: requests.session, optional set_mtime: bool, default False, if True send information about the file's client-side mtime, asking the server to set it remotely Returns ------- requests.response """ resource = upload_resource_name(filename, is_dir, group=group) endpoint = f"stream/{resource}?group={group}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'streaming data to {url}') if set_mtime: current_mtime = os.stat(filename).st_mtime headers['Modified-Time'] = str(current_mtime) resp = session.put(url, data=lazy_reader(filename, chunksize, with_progress=True), headers=headers) resp.raise_for_status() return resp
def _complete_resumable(filename, token, url, bar, session=requests, mtime=None): headers = {'Authorization': 'Bearer {0}'.format(token)} if mtime: headers['Modified-Time'] = mtime debug_step('completing resumable') resp = session.patch(url, headers=headers) resp.raise_for_status() bar.finish() debug_step('finished') return json.loads(resp.text)
def upload_resource_name(filename: str, is_dir: bool, group: Optional[str] = None) -> str: if not is_dir: debug_step('uploading file') resource = quote(format_filename(filename)) if group: resource = f'{group}/{resource}' elif is_dir: debug_step('uploading directory (file)') if filename.startswith('/'): target = filename[1:] else: target = filename resource = f'{group}/{quote(target)}' return resource
def get_resumable(env, pnum, token, filename=None, upload_id=None, dev_url=None, backend='files', is_dir=False, key=None, session=requests): """ List uploads which can be resumed. Parameters ---------- env: str, 'test' or 'prod' pnum: str, project number token: str, JWT filename: str, path upload_id: str, uuid identifying a specific upload to resume dev_url: str, development URL backend: str, API backend is_dir: bool, True if uploading a directory of files, will create a different URL structure key: str, resumable key (direcctory path) session: requests.session, optional Returns ------- dict, {filename, chunk_size, max_chunk, id} """ if not dev_url: filename = f'/{quote(format_filename(filename))}' if filename else '' endpoint = f'resumables{filename}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' else: url = dev_url if upload_id: url = '{0}?id={1}'.format(url, upload_id) elif not upload_id and is_dir and key: url = '{0}?key={1}'.format(url, quote(key, safe='')) headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'fetching resumables info, using: {url}') resp = session.get(url, headers=headers) data = json.loads(resp.text) return data
def _delete_remote_resource(self, resource) -> str: """ Choose a function, invoke it to delete a remote resource. """ delete_funcs = { 'export': export_delete, 'import': import_delete, } debug_step(f'deleting: {resource}') resp = delete_funcs[self.remote_key](self.env, self.pnum, self.token, resource, session=self.session, group=self.group) return resource
def session_expires_soon(env: str, pnum: str, token_type: str, minutes: int = 10) -> bool: if not session_file_exists(): return None token = session_token(env, pnum, token_type) if not token: return False target_time = datetime.utcnow() + timedelta(minutes=minutes) upper = int(time.mktime(target_time.timetuple())) lower = int(time.time()) if check_if_exp_is_within_range(token, lower=lower, upper=upper): debug_step(f'session will expire in the next {minutes} minutes') return True else: debug_step('session will not expire soon') return False
def _find_local_resources(self, path: str) -> list: """ Recursively list the given path. Ignore prefixes and suffixes if they exist. If self.target_dir is specified, then it is prepended to the path before the recursive listing and removed again before compiling the list. It is removed because the target directory does not exist remotely. """ path = path if not self.target_dir else os.path.normpath( f'{self.target_dir}/{path}') resources = [] integrity_reference = None debug_step('finding local resources to transfer') for directory, subdirectory, files in os.walk(path): if sys.platform == 'win32': directory = directory.replace("\\", "/") folder = directory.replace(f'{path}/', '') ignore_prefix = False for prefix in self.ignore_prefixes: if folder.startswith(prefix): ignore_prefix = True break if ignore_prefix: continue for file in files: ignore_suffix = False for suffix in self.ignore_suffixes: if file.endswith(suffix): ignore_suffix = True break if ignore_suffix: continue target = f'{directory}/{file}' if self.sync_mtime: integrity_reference = str(os.stat(target).st_mtime) if self.target_dir: target = os.path.normpath( target.replace(f'{self.target_dir}/', '')) resources.append((target, integrity_reference)) return resources
def survey_list( env, pnum, token, backend=None, session=requests, directory=None, page=None, group=None, ): endpoint = f"{directory}/attachments" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint, page=page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def get_resumable( env: str, pnum: str, token: str, filename: Optional[str] = None, upload_id: Optional[str] = None, dev_url: Optional[str] = None, backend: str = 'files', is_dir: bool = False, key: Optional[str] = None, session: Any = requests, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ List uploads which can be resumed. Returns ------- dict, {overview: {filename, chunk_size, max_chunk, id}, tokens: {}} """ if not dev_url: filename = f'/{quote(format_filename(filename))}' if filename else '' endpoint = f'resumables{filename}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' else: url = dev_url if upload_id: url = '{0}?id={1}'.format(url, upload_id) elif not upload_id and is_dir and key: url = '{0}?key={1}'.format(url, quote(key, safe='')) debug_step(f'fetching resumables info, using: {url}') tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token headers = {'Authorization': f'Bearer {token}'} resp = session.get(url, headers=headers) data = json.loads(resp.text) return {'overview': data, 'tokens': tokens}
def delete_resumable( env: str, pnum: str, token: str, filename: str, upload_id: str, dev_url: Optional[str] = None, backend: str = 'files', session: Any = requests, ) -> dict: """ Delete a specific incomplete resumable. Parameters ---------- env: 'test' or 'prod' pnum: project number token: JWT filename: filename upload_id: uuid dev_url: pass a complete url (useful for development) backend: API backend session: requests.session, optional Returns ------- dict """ if dev_url: url = dev_url else: filename = f'/{quote(format_filename(filename))}' if filename else '' endpoint = f'resumables{filename}?id={upload_id}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' debug_step(f'deleting {filename} using: {url}') resp = session.delete( url, headers={'Authorization': 'Bearer {0}'.format(token)}) resp.raise_for_status() print('Upload: {0}, for filename: {1} deleted'.format(upload_id, filename)) return json.loads(resp.text)
def lazy_reader(filename, chunksize, previous_offset=None, next_offset=None, verify=None, server_chunk_md5=None, with_progress=False): debug_step(f'reading file: {filename}') with open(filename, 'rb') as f: if verify: debug_step('verifying chunk md5sum') f.seek(previous_offset) last_chunk_size = next_offset - previous_offset last_chunk_data = f.read(last_chunk_size) md5 = hashlib.md5(last_chunk_data) try: assert md5.hexdigest() == server_chunk_md5 except AssertionError: raise Exception( 'cannot resume upload - client/server chunks do not match') if next_offset: f.seek(next_offset) if with_progress: bar = _init_progress_bar(1, chunksize, filename) while True: debug_step('reading chunk') if with_progress: try: bar.next() except ZeroDivisionError: pass data = f.read(chunksize) if not data: debug_step('no more data to read') if with_progress: bar.finish() break else: debug_step('chunk read complete') yield data
def _transfer_remote_to_local( self, resource: str, integrity_reference: Optional[str] = None, ) -> str: """ Download a resource from the remote location, resuming if local data is found, and it the integrity reference did not change since the first portion was downloaded. """ target = os.path.dirname(resource) target = target if not self.target_dir else os.path.normpath( f'{self.target_dir}/{target}') if not os.path.lexists(target): debug_step(f'creating directory: {target}') os.makedirs(target) resp = export_get( self.env, self.pnum, resource, self.token, session=self.session, etag=integrity_reference, no_print_id=True, set_mtime=self.sync_mtime, backend=self.remote_key, target_dir=self.target_dir, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, public_key=public_key, ) if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def export_list(env, pnum, token, backend='files', session=requests, directory=None, page=None, group=None): """ Get the list of files available for export. Parameters ---------- env: str, 'test' or 'prod' pnum: str, project number token: JWT backend: str, API backend session: requests.session, optional directory: str, name, optional page: str (url) next page to list group: irrelevant for exports (present for compatibility with import_list signature) Returns ------- str """ resource = f'/{directory}' if directory else '' endpoint = f'export{resource}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint, page=page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def import_list( env: str, pnum: str, token: str, backend: str = 'files', session: Any = requests, directory: Optional[str] = None, page: Optional[str] = None, group: Optional[str] = None, per_page: Optional[int] = None, ) -> dict: """ Get the list of files in the import direcctory, for a given group. Parameters ---------- env: 'test', 'prod', or 'alt' pnum:project number token: JWT backend: API backend session: requests.session directory: name page: (url) next page to list group: group owner of the upload per_page: number of files to list per page """ resource = f'/{directory}' if directory else '' endpoint = f"stream/{group}{resource}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint , page=page, per_page=per_page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def import_list(env, pnum, token, backend='files', session=requests, directory=None, page=None, group=None): """ Get the list of files in the import direcctory, for a given group. Parameters ---------- env: str, 'test' or 'prod' pnum: str, project number token: JWT backend: str, API backend session: requests.session, optional directory: str, name, optional page: str (url) next page to list group: group owner of the upload Returns ------- str """ resource = f'/{directory}' if directory else '' endpoint = f"stream/{group}{resource}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint , page=page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def export_list( env: str, pnum: str, token: str, backend: str = 'files', session: Any = requests, directory: Optional[str] = None, page: Optional[str] = None, group: Optional[str] = None, per_page: Optional[int] = None, ) -> dict: """ Get the list of files available for export. Parameters ---------- env: 'test' or 'prod', or 'alt' pnum: project number token: JWT backend: API backend session: requests.session directory: name page: url, next page to list group: irrelevant for exports (present for compatibility with import_list signature) per_page: number of files to list per page """ resource = f'/{directory}' if directory else '' endpoint = f'export{resource}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint, page=page, per_page=per_page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def refresh_access_token( env: str, pnum: str, api_key: str, refresh_token: str, ) -> tuple: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}', } data = {'refresh_token': refresh_token} url = f'{auth_api_url(env, pnum, auth_method="refresh")}' try: debug_step('refreshing token') resp = requests.post(url, data=json.dumps(data), headers=headers) except Exception as e: raise AuthnError from e if resp.status_code in [200, 201]: data = json.loads(resp.text) return data.get('token'), data.get('refresh_token') else: debug_step(json.loads(resp.text).get('message')) return None, None
def survey_list( env: str, pnum: str, token: str, backend: str = 'survey', session: Any = requests, directory: Optional[str] = None, page: Optional[str] = None, group: Optional[str] = None, per_page: Optional[int] = None, ) -> dict: """ Get the list of attachments in the survey API. Parameters ---------- env: 'test', 'prod', or 'alt' pnum:project number token: JWT backend: API backend: survey session: requests.session directory: form id page: (url) next page to list group: group owner - not relevant here per_page: number of files to list per page """ endpoint = f"{directory}/attachments" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint, page=page, per_page=per_page)}' headers = {'Authorization': 'Bearer {0}'.format(token)} debug_step(f'listing resources at {url}') resp = session.get(url, headers=headers) if resp.status_code == 404: return {'files': [], 'page': None} resp.raise_for_status() data = json.loads(resp.text) return data
def session_is_expired(env: str, pnum: str, token_type: str) -> bool: if not session_file_exists(): return True token = session_token(env, pnum, token_type) debug_step(f'found token: {token}') if not token: return True if check_if_key_has_expired(token): debug_step('session expired') return True else: debug_step('session has not expired') return False
def sync(self) -> bool: """ Use _find_resources_to_handle, _transfer, and _delete methods, to handle the directory, optionally fetching items from the cache, and clearing them as transfers complete. """ resources = [] deletes = [] # 1. check caches if self.use_cache: debug_step('reading from cache') left_overs = self.transfer_cache.read(key=self.directory) if left_overs: click.echo('resuming directory transfer from cache') resources = left_overs left_over_deletes = self.delete_cache.read(key=self.directory) if left_over_deletes: click.echo('resuming deletion from cache') deletes = left_over_deletes # 2. maybe find resources, maybe fill caches if not resources or not self.use_cache: resources, deletes = self._find_resources_to_handle(self.directory) if self.use_cache: self.transfer_cache.add_many(key=self.directory, items=resources) self.delete_cache.add_many(key=self.directory, items=deletes) # 3. transfer resources for resource, integrity_reference in resources: self._transfer(resource, integrity_reference=integrity_reference) if self.use_cache: self.transfer_cache.remove(key=self.directory, item=resource) debug_step('destroying transfer cache') self.transfer_cache.destroy(key=self.directory) # 4. maybe delete resources for resource, _ in deletes: self._delete(resource) if self.use_cache: self.delete_cache.remove(key=self.directory, item=resource) debug_step('destroying delete cache') self.delete_cache.destroy(key=self.directory) return True