Ejemplo n.º 1
0
def _complete_resumable(
    env: str,
    pnum: str,
    filename: str,
    token: str,
    url: str,
    bar: Bar,
    session: Any = requests,
    mtime: Optional[str] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> dict:
    tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                           refresh_target)
    token = tokens.get("access_token") if tokens else token
    headers = {'Authorization': f'Bearer {token}'}
    if mtime:
        headers['Modified-Time'] = mtime
    debug_step('completing resumable')
    resp = session.patch(url, headers=headers)
    resp.raise_for_status()
    bar.finish()
    debug_step('finished')
    return {'response': json.loads(resp.text), 'tokens': tokens}
Ejemplo n.º 2
0
def get_resumable(
    env: str,
    pnum: str,
    token: str,
    filename: Optional[str] = None,
    upload_id: Optional[str] = None,
    dev_url: Optional[str] = None,
    backend: str = 'files',
    is_dir: bool = False,
    key: Optional[str] = None,
    session: Any = requests,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> dict:
    """
    List uploads which can be resumed.

    Returns
    -------
    dict, {overview: {filename, chunk_size, max_chunk, id}, tokens: {}}

    """
    if not dev_url:
        filename = f'/{quote(format_filename(filename))}' if filename else ''
        endpoint = f'resumables{filename}'
        url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}'
    else:
        url = dev_url
    if upload_id:
        url = '{0}?id={1}'.format(url, upload_id)
    elif not upload_id and is_dir and key:
        url = '{0}?key={1}'.format(url, quote(key, safe=''))
    debug_step(f'fetching resumables info, using: {url}')
    tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                           refresh_target)
    token = tokens.get("access_token") if tokens else token
    headers = {'Authorization': f'Bearer {token}'}
    resp = session.get(url, headers=headers)
    data = json.loads(resp.text)
    return {'overview': data, 'tokens': tokens}
Ejemplo n.º 3
0
def export_delete(
    env: str,
    pnum: str,
    token: str,
    filename: str,
    session: Any = requests,
    group: Optional[str] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> requests.Response:
    tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                           refresh_target)
    token = tokens.get("access_token") if tokens else token
    endpoint = f'export/{filename}'
    url = f'{file_api_url(env, pnum, "files", endpoint=endpoint)}'
    headers = {'Authorization': f'Bearer {token}'}
    print(f'deleting: {filename}')
    resp = session.delete(url, headers=headers)
    resp.raise_for_status()
    return {'response': resp, 'tokens': tokens}
Ejemplo n.º 4
0
def _continue_resumable(
    env: str,
    pnum: str,
    filename: str,
    token: str,
    to_resume: str,
    group: Optional[str] = None,
    verify: bool = False,
    dev_url: Optional[str] = None,
    backend: str = 'files',
    is_dir: bool = False,
    session: Any = requests,
    set_mtime: bool = False,
    public_key: Optional[libnacl.public.PublicKey] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> dict:
    """
    Continue a resumable upload, reding a file, from the
    appopriate byte offset, chunk-by-chunk and performaing
    a PATCH request per chunk. Optional chunk md5 verification
    before resume.

    """
    tokens = {}
    url = _resumable_url(env,
                         pnum,
                         filename,
                         dev_url,
                         backend,
                         is_dir,
                         group=group)
    headers = {'Authorization': f'Bearer {token}'}
    current_mtime = os.stat(filename).st_mtime if set_mtime else None
    if set_mtime:
        headers['Modified-Time'] = str(current_mtime)
    max_chunk = to_resume['max_chunk']
    chunksize = to_resume['chunk_size']
    previous_offset = to_resume['previous_offset']
    next_offset = to_resume['next_offset']
    upload_id = to_resume['id']
    server_chunk_md5 = str(to_resume['md5sum'])
    chunk_num = max_chunk + 1
    print(f'Resuming upload with id: {upload_id}')
    bar = _init_progress_bar(chunk_num, chunksize, filename)
    for chunk, enc_nonce, enc_key, ch_size in lazy_reader(
            filename,
            chunksize,
            previous_offset,
            next_offset,
            verify,
            server_chunk_md5,
            public_key=public_key,
    ):
        tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                               refresh_target)
        if tokens:
            token = tokens.get("access_token")
            refresh_token = tokens.get("refresh_token")
            refresh_target = get_claims(token).get('exp')
            headers['Authorization'] = f'Bearer {token}'
        if public_key:
            headers['Content-Type'] = 'application/octet-stream+nacl'
            headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce)
            headers['Nacl-Key'] = nacl_encode_header(enc_key)
            headers['Nacl-Chunksize'] = str(ch_size)
        parmaterised_url = '{0}?chunk={1}&id={2}'.format(
            url, str(chunk_num), upload_id)
        debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}')
        with Retry(session.patch, parmaterised_url, headers,
                   chunk) as retriable:
            if retriable.get("new_session"):
                session = retriable.get("new_session")
            resp = retriable.get("resp")
            resp.raise_for_status()
            data = json.loads(resp.text)
        bar.next()
        upload_id = data['id']
        chunk_num = data.get("max_chunk") + 1
    if not group:
        group = '{0}-member-group'.format(pnum)
    parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format(
        url, 'end', upload_id, group)
    resp = _complete_resumable(
        env,
        pnum,
        filename,
        token,
        parmaterised_url,
        bar,
        session=session,
        mtime=str(current_mtime),
        api_key=api_key,
        refresh_token=refresh_token,
        refresh_target=refresh_target,
    )
    if not tokens:
        tokens = resp.get('tokens')
    return {
        'response': resp.get('response'),
        'tokens': tokens,
        'session': session
    }
Ejemplo n.º 5
0
def _start_resumable(
    env: str,
    pnum: str,
    filename: str,
    token: str,
    chunksize: int,
    group: Optional[str] = None,
    dev_url: Optional[str] = None,
    stop_at: Optional[int] = None,
    backend: str = 'files',
    is_dir: bool = False,
    session: Any = requests,
    set_mtime: bool = False,
    public_key: Optional[libnacl.public.PublicKey] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> dict:
    """
    Start a new resumable upload, reding a file, chunk-by-chunk
    and performaing a PATCH request per chunk.

    """
    url = _resumable_url(env,
                         pnum,
                         filename,
                         dev_url,
                         backend,
                         is_dir,
                         group=group)
    headers = {'Authorization': f'Bearer {token}'}
    current_mtime = os.stat(filename).st_mtime if set_mtime else None
    if set_mtime:
        headers['Modified-Time'] = str(current_mtime)
    chunk_num = 1
    for chunk, enc_nonce, enc_key, ch_size in lazy_reader(
            filename, chunksize, public_key=public_key):
        tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                               refresh_target)
        if tokens:
            token = tokens.get("access_token")
            refresh_token = tokens.get("refresh_token")
            refresh_target = get_claims(token).get('exp')
            headers['Authorization'] = f'Bearer {token}'
        if public_key:
            headers['Content-Type'] = 'application/octet-stream+nacl'
            headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce)
            headers['Nacl-Key'] = nacl_encode_header(enc_key)
            headers['Nacl-Chunksize'] = str(ch_size)
        if chunk_num == 1:
            parmaterised_url = '{0}?chunk={1}'.format(url, str(chunk_num))
        else:
            parmaterised_url = '{0}?chunk={1}&id={2}'.format(
                url, str(chunk_num), upload_id)
        debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}')
        with Retry(session.patch, parmaterised_url, headers,
                   chunk) as retriable:
            if retriable.get("new_session"):
                session = retriable.get("new_session")
            resp = retriable.get("resp")
            resp.raise_for_status()
            data = json.loads(resp.text)
        if chunk_num == 1:
            upload_id = data['id']
            print('Upload id: {0}'.format(upload_id))
            bar = _init_progress_bar(chunk_num, chunksize, filename)
        bar.next()
        if stop_at:
            if chunk_num == stop_at:
                print('stopping at chunk {0}'.format(chunk_num))
                return {'response': data}
        chunk_num = data.get("max_chunk") + 1
    if not group:
        group = '{0}-member-group'.format(pnum)
    parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format(
        url, 'end', upload_id, group)
    resp = _complete_resumable(
        env,
        pnum,
        filename,
        token,
        parmaterised_url,
        bar,
        session=session,
        mtime=str(current_mtime),
        api_key=api_key,
        refresh_token=refresh_token,
        refresh_target=refresh_target,
    )
    if not tokens:
        tokens = resp.get('tokens')
    return {
        'response': resp.get('response'),
        'tokens': tokens,
        'session': session
    }
Ejemplo n.º 6
0
def export_get(
    env: str,
    pnum: str,
    filename: str,
    token: str,
    chunksize: int = 4096,
    etag: Optional[str] = None,
    dev_url: Optional[str] = None,
    backend: str = 'files',
    session: Any = requests,
    no_print_id: bool = False,
    set_mtime: bool = False,
    nobar: bool = False,
    target_dir: Optional[str] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
    public_key: Optional[libnacl.public.PublicKey] = None,
) -> dict:
    """
    Download a file to the current directory.

    Parameters
    ----------
    env: 'test' or 'prod', or 'alt'
    pnum: project number
    filename: filename to download
    token: JWT
    chunksize: bytes per iteration
    etag: content reference for remote resource
    dev_url: development url
    backend: API backend
    session: requests.session
    no_print_id: supress printing the download id
    set_mtime: set local file mtime to be the same as remote resource
    nobar: disable the progress bar
    target_dir: where to save the file locally
    api_key: client specific JWT allowing token refresh
    refresh_token: a JWT with which to obtain a new access token
    refresh_target: time around which to refresh (within a default range)
    public_key: encrypt/decrypt data on-the-fly

    """
    tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                           refresh_target)
    token = tokens.get("access_token") if tokens else token
    filemode = 'wb'
    current_file_size = None
    headers = {'Authorization': f'Bearer {token}'}
    if etag:
        debug_step(f'download_id: {etag}')
        filemode = 'ab'
        if os.path.lexists(filename):
            current_file_size = os.stat(filename).st_size
            debug_step(f'found {filename} with {current_file_size} bytes')
            headers['Range'] = 'bytes={0}-'.format(current_file_size)
        else:
            debug_step(f'{filename} not found')
            headers['Range'] = 'bytes=0-'
    if dev_url:
        url = dev_url
    else:
        urlpath = '' if backend == 'survey' else 'export/'
        endpoint = f'{urlpath}{filename}'
        # make provision for unsatisfactory semantics
        if backend in ['export', 'files']:
            service = 'files'
        elif backend == 'survey':
            service = backend
        url = f'{file_api_url(env, pnum, service, endpoint=endpoint)}'
    debug_step(f'fecthing file info using: {url}')
    resp = session.head(url, headers=headers)
    resp.raise_for_status()
    try:
        download_id = resp.headers['Etag']
        if not no_print_id:
            print('Download id: {0}'.format(download_id))
    except KeyError:
        print(
            'Warning: could not retrieve download id, resumable download will not work'
        )
        download_id = None
    total_file_size = int(resp.headers['Content-Length'])
    if not nobar:
        bar = _init_export_progress_bar(unquote(filename), current_file_size,
                                        total_file_size, chunksize)
    filename = filename if not target_dir else os.path.normpath(
        f'{target_dir}/{filename}')
    destination_dir = os.path.dirname(filename)
    if destination_dir and not os.path.lexists(destination_dir):
        debug_step(f'creating directory: {destination_dir}')
        os.makedirs(destination_dir)
    if public_key:
        debug_step('generating nonce and key')
        nonce = nacl_gen_nonce()
        key = nacl_gen_key()
        enc_nonce = nacl_encrypt_header(public_key, nonce)
        enc_key = nacl_encrypt_header(public_key, key)
        headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce)
        headers['Nacl-Key'] = nacl_encode_header(enc_key)
        headers['Nacl-Chunksize'] = str(chunksize)
    with session.get(url, headers=headers, stream=True) as r:
        r.raise_for_status()
        with open(unquote(filename), filemode) as f:
            for chunk in r.iter_content(chunk_size=chunksize):
                if chunk:
                    if public_key:
                        chunk = nacl_decrypt_data(chunk, nonce, key)
                    f.write(chunk)
                    if not nobar:
                        bar.next()
            if not nobar:
                bar.next()
    if not nobar:
        bar.finish()
    if set_mtime:
        err = 'could not set Modified-Time'
        err_consequence = 'incremental sync will not work for this file'
        try:
            mtime = float(resp.headers.get('Modified-Time'))
            debug_step(f'setting mtime for {filename} to {mtime}')
            os.utime(filename, (mtime, mtime))
        except TypeError:
            print(f'{err}: {filename} - {err_consequence}')
            print(
                'issue most likely due to not getting the correct header from the API'
            )
            print(f'please report the issue: {HELP_URL}')
        except OSError:
            print(f'{err}: {filename} - {err_consequence}')
            print('issue due to local operating system problem')
    return {'filename': filename, 'tokens': tokens}
Ejemplo n.º 7
0
def streamfile(
    env: str,
    pnum: str,
    filename: str,
    token: str,
    chunksize: int = 4096,
    group: Optional[str] = None,
    backend: str = 'files',
    is_dir: bool = False,
    session: Any = requests,
    set_mtime: bool = False,
    public_key: Optional[libnacl.public.PublicKey] = None,
    api_key: Optional[str] = None,
    refresh_token: Optional[str] = None,
    refresh_target: Optional[int] = None,
) -> dict:
    """
    Idempotent, lazy data upload from files.

    Parameters
    ----------
    env: 'test', 'prod', or 'alt'
    pnum: project number
    filename: path to file
    token: JWT, access token
    chunksize: bytes to read per chunk
    group: name of file group which should own upload
    backend: which API backend to send data to
    is_dir: True if uploading a directory of files,
            will create a different URL structure
    session: e.g. requests.session
    set_mtime: if True send information about the file's client-side mtime,
               asking the server to set it remotely
    public_key: encrypt data on-the-fly (with automatic server-side decryption)
    api_key: client specific JWT allowing token refresh
    refresh_token: a JWT with which to obtain a new access token
    refresh_target: time around which to refresh (within a default range)

    """
    tokens = maybe_refresh(env, pnum, api_key, token, refresh_token,
                           refresh_target)
    token = tokens.get('access_token') if tokens else token
    resource = upload_resource_name(filename, is_dir, group=group)
    endpoint = f"stream/{resource}?group={group}"
    url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}'
    headers = {'Authorization': f'Bearer {token}'}
    debug_step(f'streaming data to {url}')
    if set_mtime:
        current_mtime = os.stat(filename).st_mtime
        headers['Modified-Time'] = str(current_mtime)
    if public_key:
        nonce, key = nacl_gen_nonce(), nacl_gen_key()
        enc_nonce = nacl_encrypt_header(public_key, nonce)
        enc_key = nacl_encrypt_header(public_key, key)
        headers['Content-Type'] = 'application/octet-stream+nacl'
        headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce)
        headers['Nacl-Key'] = nacl_encode_header(enc_key)
        headers['Nacl-Chunksize'] = str(chunksize)
    else:
        # so the lazy_reader knows to return bytes only
        nonce, key = True, True
    with Retry(
            session.put,
            url,
            headers,
            lazy_reader(
                filename,
                chunksize,
                with_progress=True,
                public_key=public_key,
                nonce=nonce,
                key=key,
            ),
    ) as retriable:
        if retriable.get("new_session"):
            session = retriable.get("new_session")
        resp = retriable.get("resp")
        resp.raise_for_status()
    return {'response': resp, 'tokens': tokens, 'session': session}