Beispiel #1
0
 def _wrap_and_track_dependencies(self, node):
     ap_type = datatype(node)
     if ap_type == 'reference':
         from activepapers.storage import dereference
         paper, node = dereference(node)
         if isinstance(node, h5py.Group):
             node = DataGroup(paper, None, node, None, None)
         else:
             node = DatasetWrapper(None, node, None)
     else:
         if self._codelet is not None:
             if ap_type is not None and ap_type != "group":
                 self._codelet.add_dependency(node.name
                                              if self._data_item is None
                                              else self._data_item.name)
             codelet = owner(node)
             if codelet is not None \
                and datatype(self._node[codelet]) == "calclet":
                 self._codelet.add_dependency(codelet)
         if isinstance(node, h5py.Group):
             node = DataGroup(self._paper, self, node,
                              self._codelet, self._data_item)
         else:
             node = DatasetWrapper(self, node, self._codelet)
     return node
Beispiel #2
0
 def _getitem(self, item):
     node = self._h5node
     if datatype(node) == 'reference':
         _, node = dereference(node)
     node = node[item]
     if datatype(node) == 'reference':
         _, node = dereference(node)
     name = self.name
     if not name.endswith('/'): name += '/'
     name += item
     return APNode(node, name)
 def _getitem(self, item):
     node = self._h5node
     if datatype(node) == 'reference':
         _, node = dereference(node)
     node = node[item]
     if datatype(node) == 'reference':
         _, node = dereference(node)
     name = self.name
     if not name.endswith('/'): name += '/'
     name += item
     return APNode(node, name)
 def walk(group):
     for node in group.values():
         if isinstance(node, h5py.Group) \
            and datatype(node) != 'data':
             yield node
             for subnode in walk(node):
                 yield subnode
Beispiel #5
0
 def walk(group):
     for node in group.values():
         if isinstance(node, h5py.Group) \
            and datatype(node) != 'data':
             yield node
             for subnode in walk(node):
                 yield subnode
Beispiel #6
0
def ls(paper, long, type, pattern):
    paper = get_paper(paper)
    paper = activepapers.storage.ActivePaper(paper, 'r')
    pattern = process_patterns(pattern)
    for item in paper.iter_items():
        name = item.name[1:] # remove initial slash
        dtype = datatype(item)
        if item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False):
            dtype = 'dummy'
        if pattern and \
           not any(p.match(name) for p in pattern):
            continue
        if type is not None and dtype != type:
            continue
        if long:
            t = item.attrs.get('ACTIVE_PAPER_TIMESTAMP', None)
            if t is None:
                sys.stdout.write(21*" ")
            else:
                sys.stdout.write(time.strftime("%Y-%m-%d/%H:%M:%S  ",
                                               time.localtime(t/1000.)))
            field_len = len("importlet ")  # the longest data type name
            sys.stdout.write((dtype + field_len*" ")[:field_len])
            sys.stdout.write('*' if paper.is_stale(item) else ' ')
        sys.stdout.write(name)
        sys.stdout.write('\n')
    paper.close()
Beispiel #7
0
def ls(paper, long, type, pattern):
    paper = get_paper(paper)
    paper = activepapers.storage.ActivePaper(paper, 'r')
    pattern = process_patterns(pattern)
    for item in paper.iter_items():
        name = item.name[1:]  # remove initial slash
        dtype = datatype(item)
        if item.attrs.get('ACTIVE_PAPER_DUMMY_DATASET', False):
            dtype = 'dummy'
        if pattern and \
           not any(p.match(name) for p in pattern):
            continue
        if type is not None and dtype != type:
            continue
        if long:
            t = item.attrs.get('ACTIVE_PAPER_TIMESTAMP', None)
            if t is None:
                sys.stdout.write(21 * " ")
            else:
                sys.stdout.write(
                    time.strftime("%Y-%m-%d/%H:%M:%S  ",
                                  time.localtime(t / 1000.)))
            field_len = len("importlet ")  # the longest data type name
            sys.stdout.write((dtype + field_len * " ")[:field_len])
            sys.stdout.write('*' if paper.is_stale(item) else ' ')
        sys.stdout.write(name)
        sys.stdout.write('\n')
    paper.close()
 def __init__(self, paper, node):
     self._paper = paper
     self._node = node
     self.type = datatype(node)
     self.language = language(node)
     self.name = node.name
     self.code = utf8(node[...].flat[0])
Beispiel #9
0
def checkout(paper, type, pattern, dry_run):
    paper = get_paper(paper)
    paper = activepapers.storage.ActivePaper(paper, 'r')
    pattern = process_patterns(pattern)
    for item in paper.iter_items():
        name = item.name[1:] # remove initial slash
        dtype = datatype(item)
        if pattern and \
           not any(p.match(name) for p in pattern):
            continue
        if type is not None and dtype != type:
            continue
        try:
            extract_to_file(paper, item, directory=os.getcwd())
        except ValueError:
            sys.stderr.write("Skipping %s: data type %s not extractable\n"
                             % (item.name, datatype(item)))
    paper.close()
Beispiel #10
0
def checkout(paper, type, pattern, dry_run):
    paper = get_paper(paper)
    paper = activepapers.storage.ActivePaper(paper, 'r')
    pattern = process_patterns(pattern)
    for item in paper.iter_items():
        name = item.name[1:]  # remove initial slash
        dtype = datatype(item)
        if pattern and \
           not any(p.match(name) for p in pattern):
            continue
        if type is not None and dtype != type:
            continue
        try:
            extract_to_file(paper, item, directory=os.getcwd())
        except ValueError:
            sys.stderr.write("Skipping %s: data type %s not extractable\n" %
                             (item.name, datatype(item)))
    paper.close()
Beispiel #11
0
 def owned(group):
     nodes = []
     for node in group.values():
         if owner(node) == codelet:
             nodes.append(node.name)
         elif isinstance(node, h5py.Group) \
            and datatype(node) != 'data':
             nodes.extend(owned(node))
     return nodes
 def owned(group):
     nodes = []
     for node in group.values():
         if owner(node) == codelet:
             nodes.append(node.name)
         elif isinstance(node, h5py.Group) \
            and datatype(node) != 'data':
             nodes.extend(owned(node))
     return nodes
Beispiel #13
0
def extract_to_file(paper, item, file=None, filename=None, directory=None):
    if file is None:
        if filename is not None:
            filename = os.path.abspath(filename)
        if directory is not None:
            directory = os.path.abspath(directory)
        if filename is not None and directory is not None:
            if not filename.startswith(directory):
                raise ValueError("% not in directory %s" %
                                 (filename, directory))
        if filename is None:
            item_name = item.name.split('/')[1:]
            filename = os.path.join(directory, *item_name)
            if '.' not in item_name[-1]:
                # Add a file extension using some heuristics
                language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
                filename += file_extensions.get((datatype(item), language), '')
        directory, _ = os.path.split(filename)
        if directory and not os.path.exists(directory):
            os.makedirs(directory)
        file = open(filename, 'wb')
        close = True
    else:
        # If a file object is given, no other file specification is allowed
        assert filename is None
        assert directory is None
        close = False
    dt = datatype(item)
    if dt in ['file', 'text']:
        internal = activepapers.storage.InternalFile(item, 'rb')
        file.write(internal.read())
    elif dt in extractable_types:
        file.write(item[...].flat[0])
    else:
        raise ValueError("cannot extract dataset %s of type %s" %
                         (item.name, dt))
    if close:
        file.close()
        mtime = mod_time(item)
        if mtime:
            os.utime(filename, (mtime, mtime))
    return filename
Beispiel #14
0
def extract_to_file(paper, item, file=None, filename=None, directory=None):
    if file is None:
        if filename is not None:
            filename = os.path.abspath(filename)
        if directory is not None:
            directory = os.path.abspath(directory)
        if filename is not None and directory is not None:
            if not filename.startswith(directory):
                raise ValueError("% not in directory %s"
                                 % (filename, directory))
        if filename is None:
            item_name = item.name.split('/')[1:]
            filename = os.path.join(directory, *item_name)
            if '.' not in item_name[-1]:
                # Add a file extension using some heuristics
                language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
                filename += file_extensions.get((datatype(item), language), '')
        directory, _ = os.path.split(filename)
        if directory and not os.path.exists(directory):
            os.makedirs(directory)
        file = open(filename, 'wb')
        close = True
    else:
        # If a file object is given, no other file specification is allowed
        assert filename is None
        assert directory is None
        close = False
    dt = datatype(item)
    if dt in ['file', 'text']:
        internal = activepapers.storage.InternalFile(item, 'rb')
        file.write(internal.read())
    elif dt in extractable_types:
        file.write(item[...].flat[0])
    else:
        raise ValueError("cannot extract dataset %s of type %s"
                         % (item.name, dt))
    if close:
        file.close()
        mtime = mod_time(item)
        if mtime:
            os.utime(filename, (mtime, mtime))
    return filename
Beispiel #15
0
 def __init__(self, paper, parent, h5group, codelet, data_item=None):
     self._paper = paper
     self._parent = parent if parent is not None else self
     self._node = h5group
     self._codelet = codelet
     self._data_item = data_item
     if self._data_item is None and datatype(h5group) == "data":
         self._data_item = self
     self.attrs = AttrWrapper(h5group)
     self.ref = h5group.ref
     self.name = h5group.name
 def process(node, refs):
     if datatype(node) == 'reference':
         paper_ref, ref_path = node[()]
         refs[paper_ref][0].add(ref_path)
     elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs:
         source = node.attrs['ACTIVE_PAPER_COPIED_FROM']
         paper_ref, ref_path = source
         refs[paper_ref.flat[0]][1].add(ref_path.flat[0])
     if isinstance(node, h5py.Group):
         for item in node:
             process(node[item], refs)
     return refs
 def replace_by_dummy(self, item_name):
     item = self.file[item_name]
     codelet = owner(item)
     assert codelet is not None
     dtype = datatype(item)
     mtime = mod_time(item)
     deps = item.attrs.get('ACTIVE_PAPER_DEPENDENCIES')
     del self.file[item_name]
     ds = self.file.create_dataset(item_name,
                                   data=np.zeros((), dtype=np.int))
     stamp(ds, dtype,
           dict(ACTIVE_PAPER_GENERATING_CODELET=codelet,
                ACTIVE_PAPER_DEPENDENCIES=list(deps)))
     timestamp(ds, mtime)
     ds.attrs['ACTIVE_PAPER_DUMMY_DATASET'] = True
Beispiel #18
0
 def track_and_check_import(self, module_name):
     if module_name == 'activepapers.contents':
         return
     node = self.paper.get_local_module(module_name)
     if node is None:
         top_level = module_name.split('.')[0]
         if top_level not in self.paper.dependencies \
            and top_level not in activepapers.standardlib.allowed_modules \
            and top_level not in ['numpy', 'h5py']:
             raise ImportError("import of %s not allowed" % module_name)
     else:
         if datatype(node) != "module":
             node = node.get("__init__", None)
         if node is not None and node.in_paper(self.paper):
             self.add_dependency(node.name)
Beispiel #19
0
 def replace_by_dummy(self, item_name):
     item = self.file[item_name]
     codelet = owner(item)
     assert codelet is not None
     dtype = datatype(item)
     mtime = mod_time(item)
     deps = item.attrs.get('ACTIVE_PAPER_DEPENDENCIES')
     del self.file[item_name]
     ds = self.file.create_dataset(item_name,
                                   data=np.zeros((), dtype=np.int))
     stamp(ds, dtype,
           dict(ACTIVE_PAPER_GENERATING_CODELET=codelet,
                ACTIVE_PAPER_DEPENDENCIES=list(deps)))
     timestamp(ds, mtime)
     ds.attrs['ACTIVE_PAPER_DUMMY_DATASET'] = True
    def run_codelet(self, path, debug=False):
        if path.startswith('/'):
            assert path.startswith('/code/')
            path = path[6:]
        node = APNode(self.code_group)[path]
        class_ = {'calclet': Calclet, 'importlet': Importlet}[datatype(node)]
        try:
            class_(self, node).run()
            return None
        except Exception:
            # TODO: preprocess traceback to show only the stack frames
            #       in the codelet.
            import traceback

            type, value, trace = sys.exc_info()
            stack = traceback.extract_tb(trace)
            del trace

            while True:
                if stack[0][2] == 'execstring':
                    del stack[0]
                    break
                del stack[0]
            
            fstack = []
            for filename, lineno, fn_name, code in stack:
                if ':' in filename:
                    paper_id, codelet = filename.split(':')
                    paper = paper_registry.get(paper_id)
                    if paper is None:
                        paper_name = '<ActivePaper>'
                    else:
                        paper_name = '<%s>' % paper.file.filename
                    filename = ':'.join([paper_name, codelet])
                    if code is None and paper is not None:
                        script = paper.file[codelet][...].flat[0]
                        code = script.split('\n')[lineno-1]
                fstack.append((filename, lineno, fn_name, code))

            tb_text = ''.join(["Traceback (most recent call last):\n"] + \
                              traceback.format_list(fstack) + \
                              traceback.format_exception_only(type, value))
            if debug:
                sys.stderr.write(tb_text)
                import pdb
                pdb.post_mortem()
            else:
                return tb_text
Beispiel #21
0
    def run_codelet(self, path, debug=False):
        if path.startswith('/'):
            assert path.startswith('/code/')
            path = path[6:]
        node = APNode(self.code_group)[path]
        class_ = {'calclet': Calclet, 'importlet': Importlet}[datatype(node)]
        try:
            class_(self, node).run()
            return None
        except Exception:
            # TODO: preprocess traceback to show only the stack frames
            #       in the codelet.
            import traceback

            type, value, trace = sys.exc_info()
            stack = traceback.extract_tb(trace)
            del trace

            while stack:
                if stack[0][2] == 'execcode':
                    del stack[0]
                    break
                del stack[0]

            fstack = []
            for filename, lineno, fn_name, code in stack:
                if ':' in filename:
                    paper_id, codelet = filename.split(':')
                    paper = paper_registry.get(paper_id)
                    if paper is None:
                        paper_name = '<ActivePaper>'
                    else:
                        paper_name = '<%s>' % paper.file.filename
                    filename = ':'.join([paper_name, codelet])
                    if code is None and paper is not None:
                        script = utf8(paper.file[codelet][...].flat[0])
                        code = script.split('\n')[lineno - 1]
                fstack.append((filename, lineno, fn_name, code))

            tb_text = ''.join(["Traceback (most recent call last):\n"] + \
                              traceback.format_list(fstack) + \
                              traceback.format_exception_only(type, value))
            if debug:
                sys.stderr.write(tb_text)
                import pdb
                pdb.post_mortem()
            else:
                return tb_text
Beispiel #22
0
 def process(node, refs):
     if datatype(node) == 'reference':
         paper_ref, ref_path = node[()]
         refs[paper_ref][0].add(ref_path)
     elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs:
         source = node.attrs['ACTIVE_PAPER_COPIED_FROM']
         paper_ref, ref_path = source
         if h5py.version.version_tuple[:2] <= (2, 2):
             # h5py 2.2 returns a wrong dtype
             paper_ref = paper_ref.flat[0]
             ref_path = ref_path.flat[0]
         refs[paper_ref][1].add(ref_path)
     if isinstance(node, h5py.Group):
         for item in node:
             process(node[item], refs)
     return refs
Beispiel #23
0
 def process(node, refs):
     if datatype(node) == 'reference':
         paper_ref, ref_path = node[()]
         refs[paper_ref][0].add(ref_path)
     elif 'ACTIVE_PAPER_COPIED_FROM' in node.attrs:
         source = node.attrs['ACTIVE_PAPER_COPIED_FROM']
         paper_ref, ref_path = source
         if h5py.version.version_tuple[:2] <= (2, 2):
             # h5py 2.2 returns a wrong dtype
             paper_ref = paper_ref.flat[0]
             ref_path = ref_path.flat[0]
         refs[paper_ref][1].add(ref_path)
     if isinstance(node, h5py.Group):
         for item in node:
             process(node[item], refs)
     return refs
Beispiel #24
0
 def find_module(self, fullname, path=None):
     codelet, paper = get_codelet_and_paper()
     if paper is None:
         return None
     node = paper.get_local_module(fullname)
     if node is None:
         # No corresponding node found
         return None
     is_package = False
     if node.is_group():
         # Node is a group, so this should be a package
         if '__init__' not in node:
             # Not a package
             return None
         is_package = True
         node = node['__init__']
     if datatype(node) != "module" \
        or node.attrs.get("ACTIVE_PAPER_LANGUAGE", None) != "python":
         # Node found but is not a Python module
         return None
     return ModuleLoader(paper, fullname, node, is_package)
def dereference(ref_node):
    assert datatype(ref_node) == 'reference'
    paper_ref, path = ref_node[()]
    paper = open_paper_ref(ascii(paper_ref))
    return paper, paper.file[path]
Beispiel #26
0
def update_from_file(paper,
                     filename,
                     type=None,
                     force_update=False,
                     dry_run=False,
                     dataset_name=None,
                     create_new=True):
    if not os.path.exists(filename):
        raise ValueError("File %s not found" % filename)
    mtime = os.path.getmtime(filename)
    basename = filename
    ext = ''
    if dataset_name is not None:
        item = paper.file.get(dataset_name, None)
        if item is not None:
            basename = item.name
    else:
        item = paper.file.get(basename, None)
        if item is None:
            basename, ext = os.path.splitext(filename)
            item = paper.file.get(basename, None)
    language = file_languages.get(ext, None)
    if item is None:
        if not create_new:
            return
        # Create new item
        if type is None:
            raise ValueError("Datatype required to create new item %s" %
                             basename)
        if type in ['calclet', 'importlet', 'module']:
            if not basename.startswith('code/'):
                raise ValueError("Items of type %s must be"
                                 " in the code section" % type)
            if language != 'python':
                raise ValueError("Items of type %s must be Python code" % type)
            if type == 'module' and \
               not basename.startswith('code/python-packages/'):
                raise ValueError("Items of type %s must be in"
                                 "code/python-packages" % type)
        elif type == 'file':
            if not basename.startswith('data/') \
               and not basename.startswith('documentation/'):
                raise ValueError("Items of type %s must be"
                                 " in the data or documentation section" %
                                 type)
            basename += ext
        elif type == 'text':
            if not basename.startswith('documentation/'):
                raise ValueError("Items of type %s must be"
                                 " in the documentation section" % type)
    else:
        # Update existing item
        if mtime <= mod_time(item) and not force_update:
            if dry_run:
                sys.stdout.write("Skip %s: file %s is not newer\n" %
                                 (item.name, filename))
            return
        if type is not None and type != datatype(item):
            raise ValueError("Cannot change datatype %s to %s" %
                             (datatype(item), type))
        if type is None:
            type = datatype(item)
        if language is None:
            language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
        if dry_run:
            sys.stdout.write("Delete %s\n" % item.name)
        else:
            del item.parent[item.name.split('/')[-1]]
    if dry_run:
        fulltype = type if language is None else '/'.join((type, language))
        sys.stdout.write("Create item %s of type %s from file %s\n" %
                         (basename, fulltype, filename))
    else:
        if type in ['calclet', 'importlet', 'module']:
            code = open(filename, 'rb').read().decode('utf-8')
            item = paper.store_python_code(basename[5:], code)
            stamp(item, type, {})
            timestamp(item, mtime)
        elif type in ['file', 'text']:
            f = paper.open_internal_file(basename, 'w')
            f.write(open(filename, 'rb').read())
            f.close()
            stamp(f._ds, type, {'ACTIVE_PAPER_LANGUAGE': language})
            timestamp(f._ds, mtime)
Beispiel #27
0
def update_from_file(paper, filename, type=None,
                     force_update=False, dry_run=False,
                     dataset_name=None, create_new=True):
    if not os.path.exists(filename):
        raise ValueError("File %s not found" % filename)
    mtime = os.path.getmtime(filename)
    basename = filename
    ext = ''
    if dataset_name is not None:
        item = paper.file.get(dataset_name, None)
        if item is not None:
            basename = item.name
    else:
        item = paper.file.get(basename, None)
        if item is None:
            basename, ext = os.path.splitext(filename)
            item = paper.file.get(basename, None)
    language = file_languages.get(ext, None)
    if item is None:
        if not create_new:
            return
        # Create new item
        if type is None:
            raise ValueError("Datatype required to create new item %s"
                             % basename)
        if type in ['calclet', 'importlet', 'module']:
            if not basename.startswith('code/'):
                raise ValueError("Items of type %s must be"
                                 " in the code section"
                                 % type)
            if language != 'python':
                raise ValueError("Items of type %s must be Python code"
                                 % type)
            if type == 'module' and \
               not basename.startswith('code/python-packages/'):
                raise ValueError("Items of type %s must be in"
                                 "code/python-packages"
                                 % type)
        elif type == 'file':
            if not basename.startswith('data/') \
               and not basename.startswith('documentation/'):
                raise ValueError("Items of type %s must be"
                                 " in the data or documentation section"
                                 % type)
            basename += ext
        elif type == 'text':
            if not basename.startswith('documentation/'):
                raise ValueError("Items of type %s must be"
                                 " in the documentation section"
                                 % type)
    else:
        # Update existing item
        if mtime <= mod_time(item) and not force_update:
            if dry_run:
                sys.stdout.write("Skip %s: file %s is not newer\n"
                                 % (item.name, filename))
            return
        if type is not None and type != datatype(item):
            raise ValueError("Cannot change datatype %s to %s"
                              % (datatype(item), type))
        if type is None:
            type = datatype(item)
        if language is None:
            language = item.attrs.get('ACTIVE_PAPER_LANGUAGE', None)
        if dry_run:
            sys.stdout.write("Delete %s\n" % item.name)
        else:
            del item.parent[item.name.split('/')[-1]]
    if dry_run:
        fulltype = type if language is None else '/'.join((type, language))
        sys.stdout.write("Create item %s of type %s from file %s\n"
                         % (basename, fulltype, filename))
    else:
        if type in ['calclet', 'importlet', 'module']:
            item = paper.store_python_code(basename[5:],
                                          open(filename, 'rb').read())
            stamp(item, type, {})
            timestamp(item, mtime)
        elif type in ['file', 'text']:
            f = paper.open_internal_file(basename, 'w')
            f.write(open(filename, 'rb').read())
            f.close()
            stamp(f._ds, type, {'ACTIVE_PAPER_LANGUAGE': language})
            timestamp(f._ds, mtime)
Beispiel #28
0
 def calclets(self):
     return dict((item.name, Calclet(self, item))
                 for item in self.iter_items()
                 if datatype(item) == 'calclet')
Beispiel #29
0
def dereference(ref_node):
    assert datatype(ref_node) == 'reference'
    paper_ref, path = ref_node[()]
    paper = open_paper_ref(ascii(paper_ref))
    return paper, paper.file[path]
 def calclets(self):
     return dict((item.name,
                  Calclet(self, item))
                 for item in self.iter_items()
                 if datatype(item) == 'calclet')