def _wrap_and_track_dependencies(self, node): ap_type = datatype(node) if ap_type == 'reference': from activepapers.storage import dereference paper, node = dereference(node) if isinstance(node, h5py.Group): node = DataGroup(paper, None, node, None, None) else: node = DatasetWrapper(None, node, None) else: if self._codelet is not None: if ap_type is not None and ap_type != "group": self._codelet.add_dependency(node.name if self._data_item is None else self._data_item.name) codelet = owner(node) if codelet is not None \ and datatype(self._node[codelet]) == "calclet": self._codelet.add_dependency(codelet) if isinstance(node, h5py.Group): node = DataGroup(self._paper, self, node, self._codelet, self._data_item) else: node = DatasetWrapper(self, node, self._codelet) return node
def _getitem(self, item): node = self._h5node if datatype(node) == 'reference': _, node = dereference(node) node = node[item] if datatype(node) == 'reference': _, node = dereference(node) name = self.name if not name.endswith('/'): name += '/' name += item return APNode(node, name)
def walk(group): for node in group.values(): if isinstance(node, h5py.Group) \ and datatype(node) != 'data': yield node for subnode in walk(node): yield subnode
def ls(paper, long, type, pattern): paper = get_paper(paper) paper = activepapers.storage.ActivePaper(paper, 'r') pattern = process_patterns(pattern) for item in paper.iter_items(): name = item.name[1:] # remove initial slash dtype = datatype(item) if item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False): dtype = 'dummy' if pattern and \ not any(p.match(name) for p in pattern): continue if type is not None and dtype != type: continue if long: t = item.attrs.get('ACTIVE_PAPER_TIMESTAMP', None) if t is None: sys.stdout.write(21*" ") else: sys.stdout.write(time.strftime("%Y-%m-%d/%H:%M:%S ", time.localtime(t/1000.))) field_len = len("importlet ") # the longest data type name sys.stdout.write((dtype + field_len*" ")[:field_len]) sys.stdout.write('*' if paper.is_stale(item) else ' ') sys.stdout.write(name) sys.stdout.write('\n') paper.close()
def ls(paper, long, type, pattern): paper = get_paper(paper) paper = activepapers.storage.ActivePaper(paper, 'r') pattern = process_patterns(pattern) for item in paper.iter_items(): name = item.name[1:] # remove initial slash dtype = datatype(item) if item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False): dtype = 'dummy' if pattern and \ not any(p.match(name) for p in pattern): continue if type is not None and dtype != type: continue if long: t = item.attrs.get('ACTIVE_PAPER_TIMESTAMP', None) if t is None: sys.stdout.write(21 * " ") else: sys.stdout.write( time.strftime("%Y-%m-%d/%H:%M:%S ", time.localtime(t / 1000.))) field_len = len("importlet ") # the longest data type name sys.stdout.write((dtype + field_len * " ")[:field_len]) sys.stdout.write('*' if paper.is_stale(item) else ' ') sys.stdout.write(name) sys.stdout.write('\n') paper.close()
def __init__(self, paper, node): self._paper = paper self._node = node self.type = datatype(node) self.language = language(node) self.name = node.name self.code = utf8(node[...].flat[0])
def checkout(paper, type, pattern, dry_run): paper = get_paper(paper) paper = activepapers.storage.ActivePaper(paper, 'r') pattern = process_patterns(pattern) for item in paper.iter_items(): name = item.name[1:] # remove initial slash dtype = datatype(item) if pattern and \ not any(p.match(name) for p in pattern): continue if type is not None and dtype != type: continue try: extract_to_file(paper, item, directory=os.getcwd()) except ValueError: sys.stderr.write("Skipping %s: data type %s not extractable\n" % (item.name, datatype(item))) paper.close()
def owned(group): nodes = [] for node in group.values(): if owner(node) == codelet: nodes.append(node.name) elif isinstance(node, h5py.Group) \ and datatype(node) != 'data': nodes.extend(owned(node)) return nodes
def extract_to_file(paper, item, file=None, filename=None, directory=None): if file is None: if filename is not None: filename = os.path.abspath(filename) if directory is not None: directory = os.path.abspath(directory) if filename is not None and directory is not None: if not filename.startswith(directory): raise ValueError("% not in directory %s" % (filename, directory)) if filename is None: item_name = item.name.split('/')[1:] filename = os.path.join(directory, *item_name) if '.' not in item_name[-1]: # Add a file extension using some heuristics language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None) filename += file_extensions.get((datatype(item), language), '') directory, _ = os.path.split(filename) if directory and not os.path.exists(directory): os.makedirs(directory) file = open(filename, 'wb') close = True else: # If a file object is given, no other file specification is allowed assert filename is None assert directory is None close = False dt = datatype(item) if dt in ['file', 'text']: internal = activepapers.storage.InternalFile(item, 'rb') file.write(internal.read()) elif dt in extractable_types: file.write(item[...].flat[0]) else: raise ValueError("cannot extract dataset %s of type %s" % (item.name, dt)) if close: file.close() mtime = mod_time(item) if mtime: os.utime(filename, (mtime, mtime)) return filename
def __init__(self, paper, parent, h5group, codelet, data_item=None): self._paper = paper self._parent = parent if parent is not None else self self._node = h5group self._codelet = codelet self._data_item = data_item if self._data_item is None and datatype(h5group) == "data": self._data_item = self self.attrs = AttrWrapper(h5group) self.ref = h5group.ref self.name = h5group.name
def process(node, refs): if datatype(node) == 'reference': paper_ref, ref_path = node[()] refs[paper_ref][0].add(ref_path) elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs: source = node.attrs['ACTIVE_PAPER_COPIED_FROM'] paper_ref, ref_path = source refs[paper_ref.flat[0]][1].add(ref_path.flat[0]) if isinstance(node, h5py.Group): for item in node: process(node[item], refs) return refs
def replace_by_dummy(self, item_name): item = self.file[item_name] codelet = owner(item) assert codelet is not None dtype = datatype(item) mtime = mod_time(item) deps = item.attrs.get('ACTIVE_PAPER_DEPENDENCIES') del self.file[item_name] ds = self.file.create_dataset(item_name, data=np.zeros((), dtype=np.int)) stamp(ds, dtype, dict(ACTIVE_PAPER_GENERATING_CODELET=codelet, ACTIVE_PAPER_DEPENDENCIES=list(deps))) timestamp(ds, mtime) ds.attrs['ACTIVE_PAPER_DUMMY_DATASET'] = True
def track_and_check_import(self, module_name): if module_name == 'activepapers.contents': return node = self.paper.get_local_module(module_name) if node is None: top_level = module_name.split('.')[0] if top_level not in self.paper.dependencies \ and top_level not in activepapers.standardlib.allowed_modules \ and top_level not in ['numpy', 'h5py']: raise ImportError("import of %s not allowed" % module_name) else: if datatype(node) != "module": node = node.get("__init__", None) if node is not None and node.in_paper(self.paper): self.add_dependency(node.name)
def run_codelet(self, path, debug=False): if path.startswith('/'): assert path.startswith('/code/') path = path[6:] node = APNode(self.code_group)[path] class_ = {'calclet': Calclet, 'importlet': Importlet}[datatype(node)] try: class_(self, node).run() return None except Exception: # TODO: preprocess traceback to show only the stack frames # in the codelet. import traceback type, value, trace = sys.exc_info() stack = traceback.extract_tb(trace) del trace while True: if stack[0][2] == 'execstring': del stack[0] break del stack[0] fstack = [] for filename, lineno, fn_name, code in stack: if ':' in filename: paper_id, codelet = filename.split(':') paper = paper_registry.get(paper_id) if paper is None: paper_name = '<ActivePaper>' else: paper_name = '<%s>' % paper.file.filename filename = ':'.join([paper_name, codelet]) if code is None and paper is not None: script = paper.file[codelet][...].flat[0] code = script.split('\n')[lineno-1] fstack.append((filename, lineno, fn_name, code)) tb_text = ''.join(["Traceback (most recent call last):\n"] + \ traceback.format_list(fstack) + \ traceback.format_exception_only(type, value)) if debug: sys.stderr.write(tb_text) import pdb pdb.post_mortem() else: return tb_text
def run_codelet(self, path, debug=False): if path.startswith('/'): assert path.startswith('/code/') path = path[6:] node = APNode(self.code_group)[path] class_ = {'calclet': Calclet, 'importlet': Importlet}[datatype(node)] try: class_(self, node).run() return None except Exception: # TODO: preprocess traceback to show only the stack frames # in the codelet. import traceback type, value, trace = sys.exc_info() stack = traceback.extract_tb(trace) del trace while stack: if stack[0][2] == 'execcode': del stack[0] break del stack[0] fstack = [] for filename, lineno, fn_name, code in stack: if ':' in filename: paper_id, codelet = filename.split(':') paper = paper_registry.get(paper_id) if paper is None: paper_name = '<ActivePaper>' else: paper_name = '<%s>' % paper.file.filename filename = ':'.join([paper_name, codelet]) if code is None and paper is not None: script = utf8(paper.file[codelet][...].flat[0]) code = script.split('\n')[lineno - 1] fstack.append((filename, lineno, fn_name, code)) tb_text = ''.join(["Traceback (most recent call last):\n"] + \ traceback.format_list(fstack) + \ traceback.format_exception_only(type, value)) if debug: sys.stderr.write(tb_text) import pdb pdb.post_mortem() else: return tb_text
def process(node, refs): if datatype(node) == 'reference': paper_ref, ref_path = node[()] refs[paper_ref][0].add(ref_path) elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs: source = node.attrs['ACTIVE_PAPER_COPIED_FROM'] paper_ref, ref_path = source if h5py.version.version_tuple[:2] <= (2, 2): # h5py 2.2 returns a wrong dtype paper_ref = paper_ref.flat[0] ref_path = ref_path.flat[0] refs[paper_ref][1].add(ref_path) if isinstance(node, h5py.Group): for item in node: process(node[item], refs) return refs
def find_module(self, fullname, path=None): codelet, paper = get_codelet_and_paper() if paper is None: return None node = paper.get_local_module(fullname) if node is None: # No corresponding node found return None is_package = False if node.is_group(): # Node is a group, so this should be a package if '__init__' not in node: # Not a package return None is_package = True node = node['__init__'] if datatype(node) != "module" \ or node.attrs.get("ACTIVE_PAPER_LANGUAGE", None) != "python": # Node found but is not a Python module return None return ModuleLoader(paper, fullname, node, is_package)
def dereference(ref_node): assert datatype(ref_node) == 'reference' paper_ref, path = ref_node[()] paper = open_paper_ref(ascii(paper_ref)) return paper, paper.file[path]
def update_from_file(paper, filename, type=None, force_update=False, dry_run=False, dataset_name=None, create_new=True): if not os.path.exists(filename): raise ValueError("File %s not found" % filename) mtime = os.path.getmtime(filename) basename = filename ext = '' if dataset_name is not None: item = paper.file.get(dataset_name, None) if item is not None: basename = item.name else: item = paper.file.get(basename, None) if item is None: basename, ext = os.path.splitext(filename) item = paper.file.get(basename, None) language = file_languages.get(ext, None) if item is None: if not create_new: return # Create new item if type is None: raise ValueError("Datatype required to create new item %s" % basename) if type in ['calclet', 'importlet', 'module']: if not basename.startswith('code/'): raise ValueError("Items of type %s must be" " in the code section" % type) if language != 'python': raise ValueError("Items of type %s must be Python code" % type) if type == 'module' and \ not basename.startswith('code/python-packages/'): raise ValueError("Items of type %s must be in" "code/python-packages" % type) elif type == 'file': if not basename.startswith('data/') \ and not basename.startswith('documentation/'): raise ValueError("Items of type %s must be" " in the data or documentation section" % type) basename += ext elif type == 'text': if not basename.startswith('documentation/'): raise ValueError("Items of type %s must be" " in the documentation section" % type) else: # Update existing item if mtime <= mod_time(item) and not force_update: if dry_run: sys.stdout.write("Skip %s: file %s is not newer\n" % (item.name, filename)) return if type is not None and type != datatype(item): raise ValueError("Cannot change datatype %s to %s" % (datatype(item), type)) if type is None: type = datatype(item) if language is None: language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None) if dry_run: sys.stdout.write("Delete %s\n" % item.name) else: del item.parent[item.name.split('/')[-1]] if dry_run: fulltype = type if language is None else '/'.join((type, language)) sys.stdout.write("Create item %s of type %s from file %s\n" % (basename, fulltype, filename)) else: if type in ['calclet', 'importlet', 'module']: code = open(filename, 'rb').read().decode('utf-8') item = paper.store_python_code(basename[5:], code) stamp(item, type, {}) timestamp(item, mtime) elif type in ['file', 'text']: f = paper.open_internal_file(basename, 'w') f.write(open(filename, 'rb').read()) f.close() stamp(f._ds, type, {'ACTIVE_PAPER_LANGUAGE': language}) timestamp(f._ds, mtime)
def update_from_file(paper, filename, type=None, force_update=False, dry_run=False, dataset_name=None, create_new=True): if not os.path.exists(filename): raise ValueError("File %s not found" % filename) mtime = os.path.getmtime(filename) basename = filename ext = '' if dataset_name is not None: item = paper.file.get(dataset_name, None) if item is not None: basename = item.name else: item = paper.file.get(basename, None) if item is None: basename, ext = os.path.splitext(filename) item = paper.file.get(basename, None) language = file_languages.get(ext, None) if item is None: if not create_new: return # Create new item if type is None: raise ValueError("Datatype required to create new item %s" % basename) if type in ['calclet', 'importlet', 'module']: if not basename.startswith('code/'): raise ValueError("Items of type %s must be" " in the code section" % type) if language != 'python': raise ValueError("Items of type %s must be Python code" % type) if type == 'module' and \ not basename.startswith('code/python-packages/'): raise ValueError("Items of type %s must be in" "code/python-packages" % type) elif type == 'file': if not basename.startswith('data/') \ and not basename.startswith('documentation/'): raise ValueError("Items of type %s must be" " in the data or documentation section" % type) basename += ext elif type == 'text': if not basename.startswith('documentation/'): raise ValueError("Items of type %s must be" " in the documentation section" % type) else: # Update existing item if mtime <= mod_time(item) and not force_update: if dry_run: sys.stdout.write("Skip %s: file %s is not newer\n" % (item.name, filename)) return if type is not None and type != datatype(item): raise ValueError("Cannot change datatype %s to %s" % (datatype(item), type)) if type is None: type = datatype(item) if language is None: language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None) if dry_run: sys.stdout.write("Delete %s\n" % item.name) else: del item.parent[item.name.split('/')[-1]] if dry_run: fulltype = type if language is None else '/'.join((type, language)) sys.stdout.write("Create item %s of type %s from file %s\n" % (basename, fulltype, filename)) else: if type in ['calclet', 'importlet', 'module']: item = paper.store_python_code(basename[5:], open(filename, 'rb').read()) stamp(item, type, {}) timestamp(item, mtime) elif type in ['file', 'text']: f = paper.open_internal_file(basename, 'w') f.write(open(filename, 'rb').read()) f.close() stamp(f._ds, type, {'ACTIVE_PAPER_LANGUAGE': language}) timestamp(f._ds, mtime)
def calclets(self): return dict((item.name, Calclet(self, item)) for item in self.iter_items() if datatype(item) == 'calclet')