async def siofu_start(sid, data): try: path = generate_uuid() filename = secure_filepath(data.get('name')) tmp_fs = Filesystem('tmpfs://') async with socketio.session(sid) as sess: sess['file_%d' % (data.get('id'))] = dict( data, path=path, name=filename, bytesLoaded=0, tmp_fs=tmp_fs, fh=tmp_fs.open(path, 'wb'), ) await socketio.emit( 'siofu_ready', { 'id': data.get('id'), 'name': None, }, room=sid, ) except Exception as e: logger.error(traceback.format_exc()) await socketio.emit('siofu_error', str(e), room=sid)
def exists(self, path): try: assert path return os.path.isfile(FS.join(self._prefix, path)) or os.path.islink(FS.join(self._prefix, path)) except Exception: logger.error(traceback.format_exc()) raise Exception(f"An error occurred while trying to access {path}")
def organize_file_content(data_fs, tmp_fs, tmp_path): with tmp_fs.open(tmp_path, 'rb') as fr: content_hash = sha1sum_io(fr) data_path = Filesystem.join('input', content_hash) if not data_fs.exists(data_path): Filesystem.mv(src_fs=tmp_fs, src_path=tmp_path, dst_fs=data_fs, dst_path=data_path) data_fs.chmod_ro(data_path) return content_hash
def link(self, src, dst): try: assert src and dst os.makedirs(os.path.dirname(FS.join(self._prefix, dst)), exist_ok=True) return os.link(FS.join(self._prefix, src), FS.join(self._prefix, dst)) except Exception: logger.error(traceback.format_exc()) raise Exception(f"An error occurred while trying to link {src} to {dst}")
def serve(app_path, **kwargs): import os import sys import time import appyter import functools import logging logger = logging.getLogger(__name__) from subprocess import Popen from appyter.ext.fs import Filesystem from appyter.context import get_env, get_jinja2_env, find_blueprints, get_appyter_directory from appyter.util import join_routes from appyter.profiles.default.filters.url_for import url_for config = get_env(**kwargs) logger.info(kwargs) env = get_jinja2_env(config=config) with Filesystem('tmpfs://') as tmp_fs: logger.info(f"Working directory {tmp_fs.path()}") # logger.info(f"Pre-rendering pages...") with Filesystem(config['CWD']).open(config['IPYNB']) as fr: from appyter.parse.nb import nb_from_ipynb_io nbtemplate = nb_from_ipynb_io(fr) with tmp_fs.open('index.html', 'w') as fw: from appyter.render.form import render_form_from_nbtemplate fw.write(render_form_from_nbtemplate(env, nbtemplate)) with tmp_fs.open('index.json', 'w') as fw: import json from appyter.render.nbinspect import render_nbtemplate_json_from_nbtemplate json.dump(render_nbtemplate_json_from_nbtemplate(env, nbtemplate), fw) with tmp_fs.open('landing.html', 'w') as fw: env.get_template('landing.j2').stream(_nb=os.path.basename( config['IPYNB']), ).dump(fw) # logger.info(f"Generating production config...") with tmp_fs.open('supervisord.conf', 'w') as fw: env.get_template('production/supervisord.conf.j2').stream( _tmp_fs=tmp_fs, sys=sys, str=str).dump(fw) with tmp_fs.open('nginx.conf', 'w') as fw: env.get_template('production/nginx.conf.j2').stream( _tmp_fs=tmp_fs, os=os, s3_to_url=s3_to_url, get_appyter_directory=get_appyter_directory, find_blueprints=find_blueprints, ).dump(fw) logger.info( f"Starting production instance at http://{config['HOST']}:{config['PORT']}{config['PREFIX']} ..." ) with Popen( ['supervisord', '-n', '-c', tmp_fs.path('supervisord.conf')]) as proc: try: sys.exit(proc.wait()) except KeyboardInterrupt: proc.terminate() sys.exit(proc.wait())
def static(filename): static = Filesystem(current_app.config['STATIC_DIR']) if static.exists(filename): return send_file(static.open(filename, 'rb'), attachment_filename=filename) # try: return send_from_directory(get_appyter_directory(f"profiles/{current_app.config['PROFILE']}/static"), filename=filename) except NotFound: return send_from_directory(get_appyter_directory('profiles/default/static'), filename=filename)
def rm(self, path, recursive=False): try: assert path if recursive: return shutil.rmtree(FS.join(self._prefix, path)) else: return os.remove(FS.join(self._prefix, path)) except Exception: logger.error(traceback.format_exc()) raise Exception(f"An error occurred while trying to access {path}")
def open(self, path, mode='r'): try: assert path if mode[0] in {'w', 'a'}: os.makedirs(os.path.dirname(FS.join(self._prefix, path)), exist_ok=True) return open(FS.join(self._prefix, path), mode=mode) except FileNotFoundError: raise Exception(f"No such file or directory: {path}") except Exception: logger.error(traceback.format_exc()) raise Exception(f"An error occurred while trying to access {path}")
def get_fields(): ''' Helper to get/cache fields even if we're on a different thread ''' global _fields if not _fields or current_app.config['DEBUG']: fs = Filesystem(current_app.config['CWD']) with fs.open(current_app.config['IPYNB'], 'r') as fr: env = get_jinja2_env(config=current_app.config) nbtemplate = nb_from_ipynb_io(fr) _fields = render_nbtemplate_json_from_nbtemplate(env, nbtemplate) return _fields
def upload_from_request(req, fnames): from flask import current_app data_fs = Filesystem(current_app.config['DATA_DIR']) data = dict() for fname in fnames: fh = req.files.get(fname) if fh: filename = secure_filepath(fh.filename) path = generate_uuid() with Filesystem('tmpfs://') as tmp_fs: with tmp_fs.open(path, 'w') as fw: fh.save(fw) data[fname] = '/'.join((organize_file_content(data_fs, tmp_fs, path), filename)) return data
def chmod_ro(self, path): try: assert path return os.chmod(FS.join(self._prefix, path), 400) except Exception: logger.error(traceback.format_exc()) raise Exception(f"An error occurred while trying to access {path}")
def nbconstruct(cwd, ipynb, context, output, **kwargs): context = json.load(context) env = get_jinja2_env( config=get_env(cwd=cwd, ipynb=ipynb, mode='construct', **kwargs), context=context, ) nbtemplate = nb_from_ipynb_io(Filesystem(cwd).open(ipynb, 'r')) nb = render_nb_from_nbtemplate(env, nbtemplate) nb_to_ipynb_io(nb, output)
def get_index(): mimetype = request.accept_mimetypes.best_match([ 'text/html', 'application/json', 'application/vnd.jupyter', ], 'text/html') fs = Filesystem(current_app.config['CWD']) if mimetype in {'text/html'}: with fs.open(current_app.config['IPYNB'], 'r') as fr: env = get_jinja2_env(config=current_app.config) nbtemplate = nb_from_ipynb_io(fr) return render_form_from_nbtemplate(env, nbtemplate) elif mimetype in {'application/json'}: with fs.open(current_app.config['IPYNB'], 'r') as fr: env = get_jinja2_env(config=current_app.config) nbtemplate = nb_from_ipynb_io(fr) return jsonify(render_nbtemplate_json_from_nbtemplate(env, nbtemplate)) elif mimetype in {'application/vnd.jupyter'}: return send_file(fs.open(current_app.config['IPYNB'], 'rb'), attachment_filename=current_app.config['IPYNB'], mimetype=mimetype) else: abort(404)
async def download_with_progress_and_hash(sid, data_fs, name, url, path, filename): # TODO: worry about files that are too big/long with Filesystem('tmpfs://') as tmp_fs: await socketio.emit('download_start', dict(name=name, filename=filename), room=sid) try: async with aiohttp.ClientSession() as client: async with client.get(url) as resp: # NOTE: this may become an issue if ever someone wants actual html assert resp.content_type != 'text/html', 'Expected data, got html' resp.headers.get('Content-Length', -1) chunk = 0 chunk_size = 1024 * 8 total_size = resp.headers.get('Content-Length', -1) async def reporthook(chunk): await socketio.emit( 'download_progress', dict(name=name, chunk=chunk, chunk_size=chunk_size, total_size=total_size), room=sid, ) with tmp_fs.open(path, 'wb') as fw: await reporthook(chunk) while buf := await resp.content.read(chunk_size): fw.write(buf) chunk += 1 await reporthook(chunk) except Exception as e: logger.error(f"download error: {traceback.format_exc()}") await socketio.emit( 'download_error', dict(name=name, filename=filename, url=url, error=str(e)), room=sid, ) else: await socketio.emit( 'download_complete', dict( name=name, filename=filename, full_filename='/'.join( (organize_file_content(data_fs, tmp_fs, path), filename)), ), room=sid, )
async def siofu_done(sid, evt): async with socketio.session(sid) as sess: sess['file_%d' % (evt['id'])]['fh'].close() config = sess['config'] data_fs = Filesystem(config['DATA_DIR']) tmp_fs = sess['file_%d' % (evt['id'])]['tmp_fs'] path = sess['file_%d' % (evt['id'])]['path'] filename = sess['file_%d' % (evt['id'])]['name'] content_hash = organize_file_content(data_fs, tmp_fs, path) tmp_fs.close() del sess['file_%d' % (evt['id'])] # await socketio.emit('siofu_complete', dict(id=evt['id'], detail=dict(full_filename='/'.join( (content_hash, filename)), )), room=sid)
async def download(sid, data): async with socketio.session(sid) as sess: config = sess['config'] # data_fs = Filesystem(config['DATA_DIR']) name = data.get('name') # TODO: hash based on url? # TODO: s3 bypass url = secure_url(data.get('url')) filename = secure_filepath(data.get('file')) await socketio.emit('download_queued', dict(name=name, filename=filename), room=sid) await download_with_progress_and_hash( sid=sid, data_fs=data_fs, name=name, url=url, path=generate_uuid(), filename=filename, )
async def nbexecute_async(ipynb='', emit=json_emitter_factory(sys.stdout), cwd='', subscribe=None): logger.info('nbexecute starting') assert callable(emit), 'Emit must be callable' with Filesystem(cwd, asynchronous=True) as fs: with fs.open(ipynb, 'r') as fr: nb = nb_from_ipynb_io(fr) # if 'appyter' not in nb.metadata: logger.warn('detected legacy format, upgrading..') nb.metadata['appyter'] = { 'nbconstruct': { 'version': 'unknown', } } # if 'nbexecute' not in nb.metadata['appyter']: nb.metadata['appyter']['nbexecute'] = { 'version': __version__, } # if 'execution_info' in nb.metadata: logger.warn('detected legacy format, upgrading..') nb.metadata['appyter']['nbexecute'].update( version='unknown', started=nb.metadata['execution_info'].get('started'), completed=nb.metadata['execution_info'].get('completed'), ) del nb.metadata['execution_info'] # if 'completed' in nb.metadata['appyter']['nbexecute']: await emit({ 'type': 'error', 'data': f"Execution already completed at {nb.metadata['appyter']['nbexecute']['completed']}" }) return elif 'started' in nb.metadata['appyter']['nbexecute']: await emit({ 'type': 'error', 'data': f"Execution already started at {nb.metadata['appyter']['nbexecute']['started']}" }) return # await emit({'type': 'status', 'data': 'Starting'}) # try: with Filesystem(cwd, with_path=True, asynchronous=True) as fs: # setup execution_info with start time nb.metadata['appyter']['nbexecute'][ 'started'] = datetime.datetime.now().replace( tzinfo=datetime.timezone.utc).isoformat() with fs.open(ipynb, 'w') as fw: nb_to_ipynb_io(nb, fw) # logger.info('nbexecute initializing') state = dict(progress=0, status='Starting') if callable(subscribe): await subscribe(lambda: dict(nb=nb_to_json(nb), **state)) # try: iopub_hook = iopub_hook_factory(nb, emit) client = NotebookClientIOPubHook( nb, allow_errors=True, timeout=None, kernel_name='python3', resources={'metadata': { 'path': fs.path() }}, iopub_hook=iopub_hook, ) await emit({'type': 'nb', 'data': nb_to_json(nb)}) async with client.async_setup_kernel(env=dict( PYTHONPATH=':'.join(sys.path), PATH=os.environ['PATH'], ), ): logger.info('nbexecute executing') state.update(status='Executing...', progress=0) await emit({'type': 'status', 'data': state['status']}) await emit({'type': 'progress', 'data': state['progress']}) n_cells = len(nb.cells) exec_count = 1 for index, cell in enumerate(nb.cells): logger.info(f"nbexecute executing cell {index}") cell = await client.async_execute_cell( cell, index, execution_count=exec_count, ) await iopub_hook(cell, index) if cell_is_code(cell): if cell_has_error(cell): raise Exception( 'Cell execution error on cell %d' % (exec_count)) exec_count += 1 if index < n_cells - 1: state['progress'] = index + 1 await emit({ 'type': 'progress', 'data': state['progress'] }) else: state['status'] = 'Success' await emit({ 'type': 'status', 'data': state['status'] }) except asyncio.CancelledError: raise except Exception as e: logger.info(f"nbexecute execution error") await emit({'type': 'error', 'data': str(e)}) # Save execution completion time logger.info('nbexecute saving') nb.metadata['appyter']['nbexecute'][ 'completed'] = datetime.datetime.now().replace( tzinfo=datetime.timezone.utc).isoformat() # save additional files # TODO: in the future we should individual files and include the original urls here nb.metadata['appyter']['nbexecute']['files'] = { path: path for path in fs.ls() if path != ipynb } # with fs.open(ipynb, 'w') as fw: nb_to_ipynb_io(nb, fw) # except asyncio.CancelledError: raise except Exception as e: await emit({ 'type': 'status', 'data': 'Error initializing, try again later' }) logger.error(traceback.format_exc()) finally: logger.info('nbexecute complete')
def nbinspect(cwd, ipynb, output, **kwargs): env = get_jinja2_env( config=get_env(cwd=cwd, ipynb=ipynb, mode='inspect', **kwargs)) nbtemplate = nb_from_ipynb_io(Filesystem(cwd).open(ipynb, 'r')) fields = render_nbtemplate_json_from_nbtemplate(env, nbtemplate) json.dump(fields, output)
def get_ipynb_hash(): global _ipynb_hash if not _ipynb_hash or current_app.config['DEBUG']: fs = Filesystem(current_app.config['CWD']) _ipynb_hash = sha1sum_io(fs.open(current_app.config['IPYNB'], 'rb')) return _ipynb_hash
def data_files(path): if path.endswith('/'): mimetype = request.accept_mimetypes.best_match([ 'text/html', 'application/json', 'application/vnd.jupyter', ], 'text/html') if mimetype == 'text/html': fs = Filesystem(current_app.config['CWD']) env = get_jinja2_env(config=current_app.config) return env.get_template('landing.j2').render( _nb=os.path.basename(current_app.config['IPYNB']), ) else: data_fs = Filesystem(Filesystem.join(current_app.config['DATA_DIR'], 'output')) path += current_app.config['IPYNB'] if data_fs.exists(path): return send_file(data_fs.open(path, 'rb'), attachment_filename=os.path.basename(path)) else: data_fs = Filesystem(Filesystem.join(current_app.config['DATA_DIR'], 'output')) if data_fs.exists(path): return send_file(data_fs.open(path, 'rb'), attachment_filename=os.path.basename(path)) abort(404)
def prepare_results(data): results_hash = sha1sum_dict(dict(ipynb=get_ipynb_hash(), data=data)) data_fs = Filesystem(current_app.config['DATA_DIR']) results_path = Filesystem.join('output', results_hash) if not data_fs.exists( Filesystem.join(results_path, current_app.config['IPYNB'])): # prepare files to be linked and update field to use filename file_fields = { field['args']['name'] for field in get_fields() if field['field'] == 'FileField' } links = [] files = {} for file_field in file_fields: if fdata := data.get(file_field): content_hash, filename = fdata.split('/', maxsplit=1) content_hash = sanitize_sha1sum(content_hash) filename = secure_filepath(filename) links.append((Filesystem.join('input', content_hash), Filesystem.join(results_path, filename))) files[filename] = filename data[file_field] = filename # construct notebook env = get_jinja2_env(config=current_app.config, context=data, session=results_hash) fs = Filesystem(current_app.config['CWD']) with fs.open(current_app.config['IPYNB'], 'r') as fr: nbtemplate = nb_from_ipynb_io(fr) # in case of constraint failures, we'll fail here nb = render_nb_from_nbtemplate(env, nbtemplate, files=files) # actually link all input files into output directory for src, dest in links: data_fs.link(src, dest) # write notebook nbfile = Filesystem.join(results_path, os.path.basename(current_app.config['IPYNB'])) with data_fs.open(nbfile, 'w') as fw: nb_to_ipynb_io(nb, fw)
def favicon(): static = Filesystem(current_app.config['STATIC_DIR']) if static.exists('favicon.ico'): return send_file(static.open('favicon.ico', 'rb'), attachment_filename='favicon.ico') abort(404)
def ls(self, path=''): ls_path = FS.join(self._prefix, path) if path else self._prefix return [ f[len(ls_path)+1:] for f in self._fs.glob(ls_path + '/*') + self._fs.glob(ls_path + '/**/*') ]
def path(self, path=''): return FS.join(self._prefix, path)