def _process_and_eval_notebook(input_fn, output_fn, run_cells, config, timeout=20 * 60, lang='python'): with open(input_fn, 'r') as f: md = f.read() nb = notebook.read_markdown(md) tab = config.tab if tab: # get the tab nb = notebook.split_markdown_cell(nb) nb = notebook.get_tab_notebook(nb, tab, config.default_tab) if not nb: logging.info(f"Skip to eval tab {tab} for {input_fn}") # write an empty file to track the dependencies open(output_fn, 'w') return # replace alias if tab in config.library: nb = library.replace_alias(nb, config.library[tab]) # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(nb, timeout) os.chdir(cwd) # write nb['metadata'].update({'language_info': {'name': lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(nb))
def test_replace_alias(self): # Test https://github.com/d2l-ai/d2l-book/issues/14 pairs = [ # before, after ('X = d2l.reshape(d2l.arange(10,20),(2,3))', 'X = torch.arange(10, 20).reshape((2, 3))'), ('d2l.numpy(a)', 'a.detach().numpy()'), ('d2l.transpose(a)', 'a.t()'), ('metric.add(l * d2l.size(y), d2l.size(y))', 'metric.add(l * y.numel(), y.numel())'), ('float(d2l.reduce_sum(cmp.astype(y.dtype)))', 'float(cmp.astype(y.dtype).sum())'), ('d2l.numpy(nn.LeakyReLU(alpha)(x))', 'nn.LeakyReLU(alpha)(x).detach().numpy()'), ('d2l.reshape(X_tile(1 - d2l.eye(n_train)).astype(\'bool\'), (1,2))', 'X_tile(1 - torch.eye(n_train)).astype(\'bool\').reshape((1, 2))' ), ('float(d2l.reduce_sum(d2l.astype(cmp, y.dtype)))', 'float(cmp.type(y.dtype).sum())'), ('\nenc_attention_weights = d2l.reshape(\n d2l.concat(net.encoder.attention_weights, 0),\n (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2\n', 'enc_attention_weights = torch.cat(net.encoder.attention_weights, 0).reshape(\n (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2' ), ('float(d2l.reduce_sum(d2l.abs(Y1 - Y2))) < 1e-6', 'float(torch.abs(Y1 - Y2).sum()) < 1e-6'), ('d2l.plt.scatter(d2l.numpy(features[:, a + b]), d2l.numpy(labels), 1);', 'd2l.plt.scatter(features[:, a + b].detach().numpy(),labels.detach().numpy(), 1);' ), ('d2l.reshape(multistep_preds[i - tau: i], (1, -1))', 'multistep_preds[i - tau:i].reshape((1, -1))'), ('X = d2l.reshape(d2l.arange(16, dtype=d2l.float32), (1, 1, 4, 4))', 'X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))' ), ('# comments\nX = d2l.reshape(a)', '# comments\nX = a.reshape()'), ('X = d2l.reshape(a) # comments', 'X = a.reshape() # comments'), ('Y[i, j] = d2l.reduce_sum((X[i: i + h, j: j + w] * K))', 'Y[i, j] = (X[i:i + h, j:j + w] * K).sum()'), ('d2l.randn(size=(1,2)) * 0.01', 'np.random.randn(size=(1,2)) * 0.01'), ('d2l.randn(size=(1,2), device=d2l.try_gpu()) * 0.01', 'np.random.randn(size=(1,2), ctx=d2l.try_gpu()) * 0.01' ), ] for a, b in pairs: self.nb.cells[0].source = a nb = library.replace_alias(self.nb, self.tab_lib) compact = lambda x: x.replace('\n', '').replace(' ', '') self.assertEqual(compact(nb.cells[0].source), compact(b))
def _process_and_eval_notebook(input_fn, output_fn, run_cells, config, timeout=20 * 60, lang='python'): with open(input_fn, 'r') as f: md = f.read() nb = notebook.read_markdown(md) tab = config.tab if tab: # get the tab nb = notebook.split_markdown_cell(nb) nb = notebook.get_tab_notebook(nb, tab, config.default_tab) if not nb: logging.info(f"Skip to eval tab {tab} for {input_fn}") # write an empty file to track the dependencies open(output_fn, 'w') return # replace alias if tab in config.library: nb = library.replace_alias(nb, config.library[tab]) # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(nb, timeout) os.chdir(cwd) # change stderr output to stdout output for cell in nb.cells: if cell.cell_type == 'code' and 'outputs' in cell: outputs = [] for out in cell['outputs']: if ('data' in out and 'text/plain' in out['data'] and out['data']['text/plain'].startswith('HBox')): # that's tqdm progress bar cannot displayed properly. continue if 'name' in out and out['name'] == 'stderr': out['name'] = 'stdout' outputs.append(out) cell['outputs'] = outputs # write nb['metadata'].update({'language_info': {'name': lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(nb))
def _process_and_eval_notebook(scheduler, input_fn, output_fn, run_cells, config, timeout=20 * 60, lang='python'): with open(input_fn, 'r') as f: md = f.read() nb = notebook.read_markdown(md) tab = config.tab if tab: # get the tab nb = notebook.split_markdown_cell(nb) nb = notebook.get_tab_notebook(nb, tab, config.default_tab) if not nb: logging.info(f"Skip to eval tab {tab} for {input_fn}") # write an empty file to track the dependencies open(output_fn, 'w') return # replace alias if tab in config.library: nb = library.replace_alias(nb, config.library[tab]) nb = library.format_code_nb(nb) if not run_cells: logging.info(f'Converting {input_fn} to {output_fn}') _job(nb, output_fn, run_cells, timeout, lang) else: # use at most 2 gpus to eval a notebook num_gpus = resource.get_notebook_gpus(nb, 2) scheduler.add(1, num_gpus, target=_job, args=(nb, output_fn, run_cells, timeout, lang), description=f'Evaluating {input_fn}')