Exemple #1
0
 def rebuild_registry(self, registry_path, variant, proc_aligned=False):
     logging.getLogger(__name__).info('Rebuilding registry {0}'.format(registry_path))
     rc = RegistryConf(corpus_id=os.path.basename(registry_path),
                       variant=variant,
                       backend=self._backend)
     rc.load()
     if not os.path.exists(os.path.dirname(registry_path)):
         os.makedirs(os.path.dirname(registry_path))
     s = RegModelSerializer(add_heading=True)
     with open(registry_path, 'w') as fw:
         fw.write(s.serialize(rc).encode(rc.encoding))
     if proc_aligned:
         for aligned in rc.aligned:
             self.rebuild_registry(os.path.join(
                 os.path.dirname(registry_path), aligned), variant)
Exemple #2
0
 def rebuild_registry(self, registry_path, variant, proc_aligned=False):
     logging.getLogger(__name__).info(
         'Rebuilding registry {0}'.format(registry_path))
     rc = RegistryConf(corpus_id=os.path.basename(registry_path),
                       variant=variant,
                       backend=self._backend)
     rc.load()
     if not os.path.exists(os.path.dirname(registry_path)):
         os.makedirs(os.path.dirname(registry_path))
     s = RegModelSerializer(add_heading=True)
     with open(registry_path, 'w') as fw:
         fw.write(s.serialize(rc).encode(rc.encoding))
     if proc_aligned:
         for aligned in rc.aligned:
             self.rebuild_registry(
                 os.path.join(os.path.dirname(registry_path), aligned),
                 variant)
Exemple #3
0
def load_registry(corpus_id, variant, backend):
    conf = RegistryConf(corpus_id=corpus_id, variant=variant, backend=backend)
    conf.load()
    return RegModelSerializer().serialize(conf)
Exemple #4
0
 def __init__(self, corpus_id, variant, tokens, backend):
     self._tokens = tokens
     self._items = RegistryConf(corpus_id, variant, backend)
     self._posattr_idx = 0
Exemple #5
0
class Parser(object):

    def __init__(self, corpus_id, variant, tokens, backend):
        self._tokens = tokens
        self._items = RegistryConf(corpus_id, variant, backend)
        self._posattr_idx = 0

    @staticmethod
    def is_key(s):
        return re.match(r'[A-Z]+', s)

    @staticmethod
    def is_value(s):
        return s != '{' and s != '}' and s != '#'

    @watchable
    def state_0(self, token, obj):
        if token == '$':
            return self.state_0, obj
        elif token.startswith('#'):
            return self.state_3, obj
        elif self.is_key(token):
            if obj:
                self._items.add_item(obj)
            if token == 'ATTRIBUTE':
                attr = PosAttribute(position=self._posattr_idx, name=token)
                self._posattr_idx += 1
                return self.state_1, attr
            elif token == 'STRUCTURE':
                return self.state_1, Struct(token)
            else:
                return self.state_1, SimpleAttr(token)
        else:
            raise RegistrySyntaxError(u'in state 0 cannot process: {0}'.format(token))

    @watchable
    def state_1(self, token, obj):
        if self.is_value(token):
            if isinstance(obj, SimpleAttr):
                obj.value = token
            else:
                obj.name = token
            return self.state_2, obj
        else:
            raise RegistrySyntaxError(u'in state 1 cannot process: {0}'.format(token))

    @watchable
    def state_2(self, token, obj):
        if token == '$':
            return self.state_0, obj
        elif token == '{':
            return self.state_4, obj

    @watchable
    def state_3(self, token, obj):
        if token == '$':
            return self.state_0, obj
        return self.state_3, obj

    def state_3b(self, token, obj):
        if token == '$':
            return self.state_4, obj
        return self.state_3b, obj

    def state_3c(self, token, obj):
        if token == '$':
            return self.state_7, obj
        return self.state_3c, obj

    @watchable
    def state_4(self, token, obj):
        if token == '}':
            return self.state_0, obj
        elif self.is_key(token):
            if token == 'ATTRIBUTE':
                obj.new_item(Attribute())
            else:
                obj.new_item(SimpleAttr(token))
            return self.state_5, obj
        elif token == '$':
            return self.state_4, obj
        elif token.startswith('#'):
            return self.state_3b, obj

    @watchable
    def state_5(self, token, obj):
        if self.is_value(token):
            if isinstance(obj.last_item, Attribute):
                obj.last_item.name = token
            else:
                obj.last_item.value = token
            return self.state_6, obj

    @watchable
    def state_6(self, token, obj):
        if token == '$':
            return self.state_4, obj
        elif token == '{':
            return self.state_7, obj

    @watchable
    def state_7(self, token, obj):
        if self.is_key(token):
            obj.last_item.new_item(SimpleAttr(token))
            return self.state_8, obj
        elif token == '$':
            return self.state_7, obj
        elif token == '}':
            return self.state_4, obj
        elif token.startswith('#'):
            return self.state_3c, obj

    @watchable
    def state_8(self, token, obj):
        if self.is_value(token):
            obj.last_item.last_item.value = token
            return self.state_9, obj

    @watchable
    def state_9(self, token, obj):
        if token == '$':
            return self.state_7, obj

    def __call__(self):
        i = 0
        fn = self.state_0
        obj = None
        while fn is not None and i < len(self._tokens):
            fn, obj = fn(self._tokens[i], obj)
            i += 1
        if obj:
            self._items.add_item(obj)
        return self._items
Exemple #6
0
def load_registry(corpus_id, variant, backend):
    conf = RegistryConf(corpus_id=corpus_id, variant=variant, backend=backend)
    conf.load()
    return RegModelSerializer().serialize(conf)