def test_notebook_dependency(fxsoldir, fxnewfile): # run notebook first nbdir = get_notebook_dir() path = os.path.join(nbdir, 'test-notebook3.ipynb') assert os.path.isfile(path) with HDF('haje01') as hdf: if 'test' in hdf.store: del hdf.store['test'] update_notebook_by_run(path) manifest = Manifest(True, path) assert manifest._prev_files_chksum == manifest._dep_files_chksum with HDF('haje01') as hdf: prev_hdf_chksum = dataframe_checksum(hdf.store['test']) print "prev_hdf_chksum {}".format(prev_hdf_chksum) print len(hdf.store['test']) # add new file with open(fxnewfile, 'w') as f: f.write('2014-03-05 23:30 [ERROR] - Async\n') manifest = Manifest(False, path) assert manifest._depend_files_changed assert manifest._prev_files_chksum != manifest._dep_files_chksum # run notebok again update_notebook_by_run(path) with HDF('haje01') as hdf: new_hdf_chksum = dataframe_checksum(hdf.store['test']) print "new_hdf_chksum {}".format(new_hdf_chksum) print len(hdf.store['test']) # check check assert prev_hdf_chksum != new_hdf_chksum
def update_notebooks(): '''Check notebook's dependency and run for dashboard if needed.''' logging.debug('update_notebooks start') nbdir = get_notebook_dir() skip_nbs = [os.path.join(nbdir, 'test-notebook6.ipynb')] update_all_notebooks(skip_nbs) logging.debug('update_notebooks done')
def start_view(nbpath): logging.debug('start_view') data = json.loads(request.data) kwargs = {} nbdir = get_notebook_dir() nbpath = os.path.join(nbdir, nbpath) formname = '' for kv in data: name = kv['name'] logging.debug('name: {}'.format(name)) if name == 'wzd_formname': logging.debug('wzd_formname') formname = kv['value'] logging.debug('formname: {}'.format(formname)) else: value = kv['value'] logging.debug(u'value: {}'.format(value)) if name in kwargs: if type(kwargs[name]) != list: kwargs[name] = [kwargs[name]] kwargs[name].append(value) else: kwargs[name] = value from wzdat.dashboard.tasks import run_view_cell task = run_view_cell.delay(nbpath, formname, kwargs) return Response(task.task_id)
def test_notebook_nodata(): nbdir = get_notebook_dir() path = os.path.join(nbdir, 'test-notebook-nodata.ipynb') assert os.path.isfile(path) update_notebook_by_run(path) rv = notebook_outputs_to_html(path) assert 'NoDataFound' in rv
def dashboard(): logging.debug("dashboard home") projname, dev, cache_time = _page_common_vars() from wzdat.util import iter_dashboard_notebook iport = int(cfg["host_ipython_port"]) base_url = 'http://%s:%d/tree' % (HOST, iport) nbdir = get_notebook_dir() groups = defaultdict(list) for nbpath, mip in iter_dashboard_notebook(nbdir): logging.debug(u"dashboard notebook {}".format(nbpath)) sdir = os.path.dirname(nbpath).replace(nbdir, '')[1:] fn = os.path.basename(nbpath) url = os.path.join(base_url, sdir, fn) fname = os.path.splitext(os.path.basename(nbpath))[0] dashbrd_info = mip['dashboard'] if isinstance(dashbrd_info, dict): gk = dashbrd_info['group'].decode('utf8') groups[gk].append((nbpath, url, fname)) else: groups[''].append((nbpath, url, fname)) logging.debug("collected notebooks by group") gnbs = [] for gk in sorted(groups.keys()): if gk != '': _collect_gnbs(gnbs, gk, groups) if '' in groups: _collect_gnbs(gnbs, '', groups) logging.debug("done _collect_gnbs") return render_template("dashboard.html", cur="dashboard", projname=projname, notebooks=gnbs, nb_url=base_url, dev=dev, cache_time=cache_time)
def test_notebook_run(): path = os.path.join(get_notebook_dir(), 'test-notebook.ipynb') assert os.path.isfile(path) before = os.stat(path).st_mtime update_notebook_by_run(path) assert os.stat(path).st_mtime > before runnbs = [ri[0] for ri in iter_run_info()] assert path in runnbs
def update_all_notebooks(skip_nbs=None): logging.debug('update_all_notebooks start') nbdir = get_notebook_dir() from wzdat.nbdependresolv import DependencyTree dt = DependencyTree(nbdir, skip_nbs) rv = dt.resolve(True) logging.debug('update_all_notebooks done') return rv
def test_notebook_util(): nbdir = get_notebook_dir() nbs = [nb for nb in iter_notebooks(nbdir)] assert len(nbs) == 13 nbms = [(nb, mi) for nb, mi in iter_notebook_manifest_input(nbdir)] assert len(nbms) == 11 path = os.path.join(nbdir, 'test-notebook3.ipynb') assert path == find_hdf_notebook_path('haje01', 'test')
def start_rerun(nbrpath): logging.debug('start_rerun') nbapath = os.path.join(get_notebook_dir(), nbrpath) from wzdat.dashboard.tasks import rerun_notebook task = rerun_notebook.delay(nbapath) rv = nbrpath + '/' + task.task_id logging.debug(u'rv {}'.format(rv)) return Response(rv)
def test_notebook_manifest_error(): nbdir = get_notebook_dir() nbapath = os.path.join(nbdir, 'test-notebook-manifest-error.ipynb') try: Manifest(False, nbapath) except SyntaxError: mpath = get_notebook_manifest_path(nbapath) with open(mpath, 'r') as f: data = json.loads(f.read()) cells = data['cells'] assert 'invalid syntax' in cells[0]['outputs'][0]['traceback'][0] else: assert False
def test_common_manifest(): import pandas as pd nbdir = get_notebook_dir() path = os.path.join(nbdir, 'test-notebook3.ipynb') manifest = Manifest(False, path) manifest.output.hdf.put(pd.DataFrame([1, 2, 3])) assert manifest._out_hdf_chksum is not None path = os.path.join(nbdir, 'test-notebook4.ipynb') manifest = Manifest(True, path) df = manifest.depends.hdf.select('index>1') assert len(df) == 1
def test_notebook_manifest1(fxsoldir): nbdir = get_notebook_dir() path = os.path.join(nbdir, 'test-notebook3.ipynb') assert os.path.isfile(path) mpath = get_notebook_manifest_path(path) assert os.path.isfile(mpath) # check manifest being written before = os.stat(mpath).st_mtime update_notebook_by_run(path) assert os.stat(mpath).st_mtime > before # check hdf store from wzdat.util import HDF with HDF('haje01') as hdf: df = hdf.store.select('test') assert len(df) == 7560 # check manifest checksum import json with open(mpath, 'r') as f: data = json.loads(f.read()) cells = data['cells'] assert len(cells) == 2 chksums = cells[1]['source'] assert 'WARNING' in chksums[0] assert 'last_run' in chksums[2] assert 'elapsed' in chksums[3] assert 'max_memory' in chksums[4] assert 'error' in chksums[5] assert 'depends' in chksums[6] assert '8875249185536240278' in chksums[7] # check output checksum assert 'output' in chksums[9] assert '-2394538446589678049' in chksums[10] manifest = Manifest(False, path) assert type(manifest.last_run) is datetime assert manifest._out_hdf_chksum is None # rewrite manifest output by hdf put manifest.output.hdf.put(df, data_columns=['level']) # select manifest output by hdf select path = os.path.join(nbdir, 'test-notebook4.ipynb') manifest = Manifest(True, path) df = manifest.depends.hdf.select("index>Timestamp('2014-03-01') &" "level='INFO'", columns=['level', 'node']) assert len(df) == 1125 assert len(df.columns) == 2
def test_notebook_error(): path = os.path.join(get_notebook_dir(), 'test-notebook-error.ipynb') assert os.path.isfile(path) try: update_notebook_by_run(path) except ValueError: pass assert check_notebook_error_and_changed(path) == (True, False) touch(path) assert check_notebook_error_and_changed(path) == (True, True) from wzdat import rundb redis_ri = rundb.get_run_info(path) rundb.remove_run_info(path) manifest_ri = get_run_info(path) # both elapsed time is equal assert redis_ri[1] == manifest_ri[1] # both error msg is equal assert redis_ri[-1] == manifest_ri[-1]
def test_notebook_manifest2(fxsoldir, fxhdftest2): # multiple files & hdfs dependency test nbdir = get_notebook_dir() path = os.path.join(nbdir, 'test-notebook5.ipynb') assert os.path.isfile(path) mpath = get_notebook_manifest_path(path) assert os.path.isfile(mpath) update_notebook_by_run(path) manifest = Manifest(True, path) assert len(manifest.depends.files) == 2 assert len(manifest.depends.hdf) == 2 assert len(manifest._dep_files_chksum) == 2 assert len(manifest._dep_hdf_chksum) == 2 assert manifest._out_hdf_chksum is None path = os.path.join(nbdir, 'test-notebook6.ipynb') mpath = get_notebook_manifest_path(path) with pytest.raises(RecursiveReference): Manifest(False, path)
def __init__(self, check_depends=True, explicit_nbpath=None): super(Manifest, self).__init__() if explicit_nbpath is None: nbdir = get_notebook_dir() logging.debug(u"nbdir {}".format(nbdir)) nbrpath = get_notebook_rpath() logging.debug(u"nbrpath {}".format(nbrpath)) self._nbapath = os.path.join(nbdir, nbrpath) self._path = os.path.join(nbdir, get_notebook_manifest_path(nbrpath)) else: logging.debug(u"explicit_nbpath {}".format(explicit_nbpath)) self._nbapath = explicit_nbpath self._path = get_notebook_manifest_path(explicit_nbpath) logging.debug(u"find manifest {}".format(self._path)) if not os.path.isfile(self._path.encode('utf8')): raise ManifestNotExist() self._init_checksum(check_depends) logging.debug("Manifest __init__ done")
def poll_rerun(task_info): task_id = 0 # logging.debug(u'poll_rerun {}'.format(task_info)) from wzdat import rundb from wzdat.dashboard.tasks import rerun_notebook task_id = task_info.split('/')[-1] nbrpath = '/'.join(task_info.split('/')[:-1]) nbapath = os.path.join(get_notebook_dir(), nbrpath) try: task = rerun_notebook.AsyncResult(task_id) state = task.state if state == 'PENDING': logging.debug('task pending') return 'PROGRESS:0' elif task.state == 'PROGRESS': ri = rundb.get_run_info(nbapath) if ri is not None: logging.debug(u"run info exist") err = ri[4] logging.debug(u'err: {}'.format(err)) if err == 'None': cur = int(ri[2]) total = int(ri[3]) + 1 logging.debug(u'cur {} total {}'.format(cur, total)) return 'PROGRESS:' + str(cur/float(total)) else: logging.debug(u"ri error {}".format(err)) return Response('<div class="view"><pre ' 'class="ds-err">%s</pre></div>' % err) else: logging.debug(u"run info not exist") return 'PROGRESS:0' nodata = task.get() if nodata is not None: return Response(nodata) except NoDataFound, e: logging.debug(unicode(e)) return Response(u'<div class="view"><pre class="ds-err">{}</pre></div>' .format(unicode(e)))
def _collect_gnbs(gnbs, gk, groups): nbs = [] nbdir = get_notebook_dir() logging.debug('_collect_gnbs ' + nbdir) # logging.debug(str(groups[gk])) for path, url, fname in groups[gk]: out = notebook_outputs_to_html(path) ri = get_run_info(path) logging.debug('get_run_info {}'.format(ri)) if ri is not None: start, elapsed = _get_run_time(ri) cur = ri[2] total = ri[3] err = ri[4].decode('utf8') if err != 'None': out = '<div class="fail-result">Check error, fix it, '\ 'and rerun.</div>' # logging.debug(u'err {}'.format(err)) ri = (start, elapsed, cur, total, err) path = path.replace(nbdir, '')[1:] nbs.append((url, fname, out, ri, path)) gnbs.append((gk, nbs)) logging.debug('_collect_gnbs done')
def test_notebook_depresolv(fxsoldir): nbdir = get_notebook_dir() from wzdat.nbdependresolv import DependencyTree skip_nbs = [os.path.join(nbdir, 'test-notebook6.ipynb')] dt = DependencyTree(nbdir, skip_nbs) nb3 = dt.get_notebook_by_fname('test-notebook3') nb4 = dt.get_notebook_by_fname('test-notebook4') nb5 = dt.get_notebook_by_fname('test-notebook5') assert nb4.is_depend(nb3) assert nb5.is_depend(nb3) assert nb5.is_depend(nb4) resolved, _ = dt.resolve(True) sched_nbs = set([snb for snb, scd in iter_scheduled_notebook(nbdir)]) resolved_nbs = set([nb.path for nb in resolved]) assert len(sched_nbs & resolved_nbs) == 0 # test hdf dependency assert nb3.manifest.output.hdf.checksum() ==\ nb4.manifest.depends.hdf.checksum() assert nb3.manifest.output.hdf.checksum() ==\ nb5.manifest.depends.hdf[0].checksum() assert nb4.manifest.output.hdf.checksum() ==\ nb5.manifest.depends.hdf[1].checksum()
def test_common_runinfo(): path = os.path.join(get_notebook_dir(), 'test-notebook.ipynb') rundb.remove_run_info(path) ri = get_run_info(path) assert len(ri) == 5
import os import sys sys.path.append('/solution') import matplotlib import matplotlib as mlp mlp.rcParams['font.family'] = u'NanumGothic' mlp.rcParams['font.size'] = 10 import pandas as pd pd.set_option('io.hdf.default_format', 'table') # default hdf format 'table' from pandas import Series, DataFrame import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_style('darkgrid', {'font.family': [u'NanumGothic']}) from wzdat.util import hdf_path, hdf_exists, get_notebook_rpath, get_notebook_dir from wzdat.notebook_runner import NoDataFound from wzdat.manifest import Manifest, ManifestNotExist try: nbrpath = __nbpath__ if '__nbpath__' in globals() else\ get_notebook_rpath(False) nbapath = os.path.join(get_notebook_dir(), nbrpath) manifest_ = Manifest(True, nbapath) except ManifestNotExist: manifest_ = None import os
def test_notebook_dashboard(fxsoldir): nbdir = get_notebook_dir() dnbs = [nbpath for nbpath in iter_dashboard_notebook(nbdir)] assert len(dnbs) == 5