def rebuild_registry(self, registry_path, variant, proc_aligned=False): logging.getLogger(__name__).info('Rebuilding registry {0}'.format(registry_path)) rc = RegistryConf(corpus_id=os.path.basename(registry_path), variant=variant, backend=self._backend) rc.load() if not os.path.exists(os.path.dirname(registry_path)): os.makedirs(os.path.dirname(registry_path)) s = RegModelSerializer(add_heading=True) with open(registry_path, 'w') as fw: fw.write(s.serialize(rc).encode(rc.encoding)) if proc_aligned: for aligned in rc.aligned: self.rebuild_registry(os.path.join( os.path.dirname(registry_path), aligned), variant)
def rebuild_registry(self, registry_path, variant, proc_aligned=False): logging.getLogger(__name__).info( 'Rebuilding registry {0}'.format(registry_path)) rc = RegistryConf(corpus_id=os.path.basename(registry_path), variant=variant, backend=self._backend) rc.load() if not os.path.exists(os.path.dirname(registry_path)): os.makedirs(os.path.dirname(registry_path)) s = RegModelSerializer(add_heading=True) with open(registry_path, 'w') as fw: fw.write(s.serialize(rc).encode(rc.encoding)) if proc_aligned: for aligned in rc.aligned: self.rebuild_registry( os.path.join(os.path.dirname(registry_path), aligned), variant)
def load_registry(corpus_id, variant, backend): conf = RegistryConf(corpus_id=corpus_id, variant=variant, backend=backend) conf.load() return RegModelSerializer().serialize(conf)
def __init__(self, corpus_id, variant, tokens, backend): self._tokens = tokens self._items = RegistryConf(corpus_id, variant, backend) self._posattr_idx = 0
class Parser(object): def __init__(self, corpus_id, variant, tokens, backend): self._tokens = tokens self._items = RegistryConf(corpus_id, variant, backend) self._posattr_idx = 0 @staticmethod def is_key(s): return re.match(r'[A-Z]+', s) @staticmethod def is_value(s): return s != '{' and s != '}' and s != '#' @watchable def state_0(self, token, obj): if token == '$': return self.state_0, obj elif token.startswith('#'): return self.state_3, obj elif self.is_key(token): if obj: self._items.add_item(obj) if token == 'ATTRIBUTE': attr = PosAttribute(position=self._posattr_idx, name=token) self._posattr_idx += 1 return self.state_1, attr elif token == 'STRUCTURE': return self.state_1, Struct(token) else: return self.state_1, SimpleAttr(token) else: raise RegistrySyntaxError(u'in state 0 cannot process: {0}'.format(token)) @watchable def state_1(self, token, obj): if self.is_value(token): if isinstance(obj, SimpleAttr): obj.value = token else: obj.name = token return self.state_2, obj else: raise RegistrySyntaxError(u'in state 1 cannot process: {0}'.format(token)) @watchable def state_2(self, token, obj): if token == '$': return self.state_0, obj elif token == '{': return self.state_4, obj @watchable def state_3(self, token, obj): if token == '$': return self.state_0, obj return self.state_3, obj def state_3b(self, token, obj): if token == '$': return self.state_4, obj return self.state_3b, obj def state_3c(self, token, obj): if token == '$': return self.state_7, obj return self.state_3c, obj @watchable def state_4(self, token, obj): if token == '}': return self.state_0, obj elif self.is_key(token): if token == 'ATTRIBUTE': obj.new_item(Attribute()) else: obj.new_item(SimpleAttr(token)) return self.state_5, obj elif token == '$': return self.state_4, obj elif token.startswith('#'): return self.state_3b, obj @watchable def state_5(self, token, obj): if self.is_value(token): if isinstance(obj.last_item, Attribute): obj.last_item.name = token else: obj.last_item.value = token return self.state_6, obj @watchable def state_6(self, token, obj): if token == '$': return self.state_4, obj elif token == '{': return self.state_7, obj @watchable def state_7(self, token, obj): if self.is_key(token): obj.last_item.new_item(SimpleAttr(token)) return self.state_8, obj elif token == '$': return self.state_7, obj elif token == '}': return self.state_4, obj elif token.startswith('#'): return self.state_3c, obj @watchable def state_8(self, token, obj): if self.is_value(token): obj.last_item.last_item.value = token return self.state_9, obj @watchable def state_9(self, token, obj): if token == '$': return self.state_7, obj def __call__(self): i = 0 fn = self.state_0 obj = None while fn is not None and i < len(self._tokens): fn, obj = fn(self._tokens[i], obj) i += 1 if obj: self._items.add_item(obj) return self._items