Esempio n. 1
0
    def content(self, value):
        if isinstance(value, str):
            self._content      = value
            self._partial_view = IOView("list")
            self.view          = IOView("list")

        elif isinstance(value, Path):
            self._content = value

            self._partial_view = IOView(
                mode = "pickle",
                path = value.with_suffix(
                    "{0}.orpyste.partial.ast".format(value.suffix)
                )
            )

            self.view = IOView(
                mode = "pickle",
                path = value.with_suffix(
                    "{0}.orpyste.ast".format(value.suffix)
                )
            )

        else:
            raise TypeError("invalid type for the attribut ``content``.")
Esempio n. 2
0
    def content(self, value):
        if isinstance(value, str):
            self._content = value
            self._partial_view = IOView("list")
            self.view = IOView("list")

        elif isinstance(value, Path):
            self._content = value

            self._partial_view = IOView(mode="pickle",
                                        path=value.with_suffix(
                                            "{0}.orpyste.partial.ast".format(
                                                value.suffix)))

            self.view = IOView(mode="pickle",
                               path=value.with_suffix("{0}.orpyste.ast".format(
                                   value.suffix)))

        else:
            raise TypeError("invalid type for the attribut ``content``.")
Esempio n. 3
0
class AST():
    """
prototype::
    see = Mode, CtxtInfos, ContentInfos

    arg-attr = pathlib.Path, str: content ;
               ``content`` can be an instance of the class ``pathlib.Path``,
               that is a file given using its path, or ``content`` can be a
               string with all the content to be analyzed (see the attribut
               ``view``)
    arg-attr = str, dict: mode ;
               an ¨orpyste mode that can use different kinds of syntax (see the
               documentation of the class ``Mode``)
    arg-attr = str: encoding = "utf-8" ;
               a well named argument...


    attr = file, io.StringIO: view ;
           this attribut contains a verbose and easy to read version of the
           abstract syntax tree in either a pickle file if the argument attribut
           ``content`` is a ``pathlib.Path``, or a ``io.StringIO`` if the
           argument attribut ``content`` is a string

    method = build ;
             you have to call this method each time you must build, or rebuild,
             the abstract syntax tree


This class can build an Abstract Syntax Tree (AST) view of a merely ¨orpyste
file. We have written "merely" because here we allow some semantic illegal
¨peuf syntaxes. This will the job of ``parse.Walk`` to manage this kind of
errors among some other ones.


Here is a very simple example showing how to build the AST view and how to walk
in this view.

pyterm:
    >>> from pprint import pprint # For pretty printings of dictionaries.
    >>> from orpyste.parse.ast import AST
    >>> content = '''
    ... ==========================
    ... Section 1
    ... must be on a single line !
    ... ==========================

    ... test::
    ...     Missing a key-val first !
    ...     a = 3
    ... '''.strip()
    >>> mode = 'keyval::='
    >>> ast = AST(content = content, mode = mode)
    >>> ast.build()
    >>> for metadata in ast:
    ...     pprint(metadata)
    {'kind': 'section', 'nbline': 1, 'openclose': 'open'}
    {'content': 'Section 1', 'kind': ':verbatim:', 'nbline': 2}
    {'content': 'must be on a single line !', 'kind': ':verbatim:', 'nbline': 3}
    {'kind': 'section', 'nbline': 4, 'openclose': 'close'}
    {'kind': ':emptyline:', 'nbline': 5}
    {'groups_found': {'name': 'test'},
     'kind': 'block',
     'mode': 'keyval',
     'nbline': 6,
     'openclose': 'open'}
    {'content': {'value_in_line': 'Missing a key-val first !'},
     'kind': ':content:',
     'nbline': 7}
    {'content': {'key': 'a', 'sep': '=', 'value': '3'},
     'kind': ':content:',
     'nbline': 8}
    {'openclose': 'close', 'nbline': 8, 'kind': 'block'}


warning::
    This class does not do any semantic analysis as we can see in the example
    above where the title of the section is on two lines instead of a single
    one, and the content of the block orpyste::``test`` starts with an inline
    value instead of a key-value one. This will the job of ``parse.Walk`` to
    manage semantic problems.
    """
    # CONFIGURATIONS OF THE CONTEXTS [human form]
    SPACES_PATTERN = "[ \\t]*"
    LINE_PATTERN = "^.*$"

    KEY_GRP_PATTERN = "(?P<key>.*?)"
    VALUE_GRP_PATTERN = "(?P<value>.*)"

    # The CTXTS_CONFIGS are sorted from the first to be tested to the last one.

    CLOSED_BY_INDENT_ID, CLOSED_AT_END_ID, VERBATIM_ID = range(3)

    # If the two following key are not used, this will means "use all possible
    # contexts inside me". The name of the context cannot look like ``:onename:``
    # with double points.
    SUBCTXTS = "subcontexts"
    INFINITY_LEVEL = "inf-level"

    CTXTS_CONFIGS = OrderedDict()

    # The missing ``CLOSE`` indicates an auto-close context.
    #
    # << Warning ! >> The group name ``content`` indicates to put matching in a
    # content line like context.
    CTXTS_CONFIGS[MAGIC_COMMENT] = {
        OPEN: "^////$",
        INFINITY_LEVEL: True,  # This allows to force the level.
        SUBCTXTS: VERBATIM_ID  # This indicates no subcontext.
    }

    CTXTS_CONFIGS[COMMENT_SINGLELINE] = {
        OPEN: "^//(?P<content>.*)$",
        INFINITY_LEVEL: True,
        SUBCTXTS: VERBATIM_ID
    }

    CTXTS_CONFIGS[COMMENT_MULTILINES_SINGLELINE] = {
        OPEN: "^/\*(?P<content>.*)\*/[ \t]*$",
        INFINITY_LEVEL: True,
        SUBCTXTS: VERBATIM_ID
    }

    CTXTS_CONFIGS[COMMENT_MULTILINES] = {
        OPEN: "^/\*(?P<content>.*)$",
        CLOSE: "^(?P<content>.*)\*/[ \t]*$",
        SUBCTXTS: VERBATIM_ID,
        INFINITY_LEVEL: True,
        CLOSED_AT_END_ID: True
    }

    # Sections.
    CTXTS_CONFIGS[SECTION_TAG] = {
        OPEN: "^={2,}$",
        CLOSE: "^={2,}$",
        SUBCTXTS: VERBATIM_ID,
        CLOSED_AT_END_ID: False
    }

    # ``CLOSE: CLOSED_BY_INDENT_ID`` indicates a context using indentation for its
    # content.
    #
    # We can use tuple to indicate several patterns, and we can also use a special
    # keyword ``not::`` for negate a regex (doing this in pure regex can be very
    # messy).
    LEGAL_BLOCK_NAME_RE = re.compile("^{0}$".format(LEGAL_BLOCK_NAME))

    CTXTS_CONFIGS[BLOCK_TAG] = {
        OPEN: ("^{0}(?P<name>{1})::$".format(SPACES_PATTERN, LEGAL_BLOCK_NAME),
               "not::^{0}{1}\\\\::$".format(SPACES_PATTERN, LEGAL_BLOCK_NAME)),
        CLOSE:
        CLOSED_BY_INDENT_ID,
        CLOSED_AT_END_ID:
        True
    }

    def __init__(self, content, mode, encoding="utf-8"):
        # User's arguments.
        self.content = content
        self.mode = mode
        self.encoding = encoding

        # Let's build our contexts' rules.
        self.build_ctxts_rules()
        self.build_contents_rules()

# -- SPECIAL SETTERS -- #

    @property
    def content(self):
        return self._content

    @content.setter
    def content(self, value):
        if isinstance(value, str):
            self._content = value
            self._partial_view = IOView("list")
            self.view = IOView("list")

        elif isinstance(value, Path):
            self._content = value

            self._partial_view = IOView(mode="pickle",
                                        path=value.with_suffix(
                                            "{0}.orpyste.partial.ast".format(
                                                value.suffix)))

            self.view = IOView(mode="pickle",
                               path=value.with_suffix("{0}.orpyste.ast".format(
                                   value.suffix)))

        else:
            raise TypeError("invalid type for the attribut ``content``.")

    @property
    def mode(self):
        return self._mode

    @mode.setter
    def mode(self, value):
        self._mode = Mode(value)

# -- INTERNAL CONTEXTS' RULES -- #

    def build_ctxts_rules(self):
        """
prototype::
    action = this method builds ¨python none human lists and dictionaries used
             to build an intermediate abstract syntax tree of the contexts
             which are either opening or closing blocks or comments, or empty
             lines, or lines of contents (you can breath now).
             This will be the job of ``self.build_contents_rules`` to take care
             of lines of contents.
        """
        # MATCHERS FOR THE CONTEXTS [the E.T. experience] ;-)
        #
        # We build ¨python none human list for research with the following constraints.
        #
        #     1) We stop as soon as we find a winning matching.
        #     2) If a an opening context has been found just before, we have to test
        #        first its associated closing context which can be either a pattern or #        an indentation closing.
        #        Then we look for "all" the other opening and then closing contexts.
        #     3) If no open context has been found just before, we test first "all" the
        #        open contexts and then "all" the close ones.
        #     4) We have to take care of subcontexts.
        #     5) We store the regex objects in a list (think about the subcontexts).
        #
        # << Warning ! >> We add a matcher for empty line at the very beginning because
        # we want to keep them but we have also have to skip them when searching for
        # contexts. So easy... :-(
        self.MATCHERS = [{
            True:  # Boolean wanted.
            [re.compile("^$")]  # List of regexes to test.
        }]

        self.CLOSING_ID_FROM_OPENING = {}

        self.CTXTINFOS_EMPTYLINE = CtxtInfos(
            kind=EMPTYLINE_TAG,
            id_matcher=0  # See ``self.MATCHERS``.
        )

        self.CTXTINFOS_CONTENT = CtxtInfos(kind=SPE_CONTENT_TAG)

        self.CTXTS_MATCHERS = [self.CTXTINFOS_EMPTYLINE]

        self.CTXTS_KINDS_SUBCTXTS = {}

        self.INFINITY = float('inf')
        self.CTXTS_KINDS_WITH_INF_LEVELS = set()

        self.CTXTS_KINDS_CLOSED_AT_END = set()

        id_matcher = len(self.MATCHERS) - 1
        name2id = {}

        for openclose in [OPEN, CLOSE]:
            for kind, configs in self.CTXTS_CONFIGS.items():
                if openclose in configs:
                    spec = configs[openclose]

                    # We do not keep the special keyword CLOSED_BY_INDENT_ID.
                    if openclose == CLOSE \
                    and spec == self.CLOSED_BY_INDENT_ID:
                        continue

# We manage other cases.
                    if isinstance(spec, (str, int)):
                        spec = [spec]

                    matcher = {}
                    regex_grps = []

                    # A regex pattern.
                    for s in spec:
                        if s.startswith("not::"):
                            boolwanted = False
                            s = s[5:]

                        else:
                            boolwanted = True

                        pattern = re.compile(s)

                        # Do we have regex groups ?
                        regex_grps += [x for x in pattern.groupindex]

                        # We add a new regex.
                        if boolwanted in matcher:
                            matcher[boolwanted].append(pattern)

                        else:
                            matcher[boolwanted] = [pattern]

                    id_matcher += 1
                    self.MATCHERS.append(matcher)

                    _openclose = openclose

                    if CLOSE in configs:
                        if configs[CLOSE] == self.CLOSED_BY_INDENT_ID:
                            indented = True

                        else:
                            indented = False

                            self.CLOSING_ID_FROM_OPENING[kind] = id_matcher

                    else:
                        _openclose = AUTOCLOSE
                        indented = False

                    if configs.get(self.CLOSED_AT_END_ID, False):
                        self.CTXTS_KINDS_CLOSED_AT_END.add(kind)

                    verbatim = (self.SUBCTXTS in configs
                                and configs[self.SUBCTXTS] == self.VERBATIM_ID)

                    self.CTXTS_MATCHERS.append(
                        CtxtInfos(kind=kind,
                                  openclose=_openclose,
                                  indented=indented,
                                  id_matcher=id_matcher,
                                  regex_grps=regex_grps,
                                  verbatim=verbatim))

                    name2id[(openclose, kind)] = id_matcher

# SUBCONTEXTS AND CONTEXT'S LEVEL
        for kind, configs in self.CTXTS_CONFIGS.items():
            if self.INFINITY_LEVEL in configs:
                self.CTXTS_KINDS_WITH_INF_LEVELS.add(kind)

            if self.SUBCTXTS in configs:
                # Empty lines can appear anywhere !
                subctxts = [(self.CTXTINFOS_EMPTYLINE.openclose,
                             self.CTXTINFOS_EMPTYLINE.kind)]

                if configs[self.SUBCTXTS] == self.VERBATIM_ID:
                    if (CLOSE, kind) in name2id:
                        subctxts.append((CLOSE, kind))

                else:
                    for kind in configs[self.SUBCTXTS]:
                        for openclose in [OPEN, CLOSE]:
                            if (openclose, kind) in name2id:
                                subctxts.append((openclose, kind))

                self.CTXTS_KINDS_SUBCTXTS[kind] = subctxts

# -- INTERNAL CONTENTS' RULES -- #

    def build_contents_rules(self):
        """
prototype::
    action = this method builds ¨python none human lists and dictionaries used
             to build from the intermediate abstract syntax tree of the
             contexts the final abstract syntax tree where the lines of contents
             have been analyzed.
        """
        # Configurations of the patterns for datas in contexts
        self.CONTENTS_MATCHERS = {}

        id_matcher = len(self.MATCHERS)

        # For the "verbatim" mode.
        self.MATCHERS.append({True: [re.compile("^(?P<value_in_line>.*)$")]})
        id_verbatim = id_matcher

        # Let's work !
        for ctxt, configs in self.mode.items():
            # "keyval" or "multikeyval" modes.
            if configs[MODE_TAG] in [KEYVAL, MULTIKEYVAL]:
                # We must take care of separators with several characters, and we also have to
                # escape special characters.
                seps = []

                for onesep in configs[SEPS_TAG]:
                    if len(onesep) != 1:
                        onesep = "({0})".format(re.escape(onesep))

                    else:
                        onesep = re.escape(onesep)

                    seps.append(onesep)

                pattern = re.compile(
                    "{spaces}{key}{spaces}(?P<sep>{seps}){spaces}{value}".
                    format(spaces=self.SPACES_PATTERN,
                           key=self.KEY_GRP_PATTERN,
                           value=self.VALUE_GRP_PATTERN,
                           seps="|".join(seps)))

                self.MATCHERS.append({True: [pattern]})
                id_matcher += 1

                # Do we have regex groups ?
                regex_grps = [x for x in pattern.groupindex]

                self.CONTENTS_MATCHERS[ctxt] = ContentInfos(
                    mode=configs[MODE_TAG],
                    id_matcher=[id_matcher, id_verbatim],
                    regex_grps=regex_grps,
                )

# "verbatim" and "container" modes.
            elif configs[MODE_TAG] in [VERBATIM, CONTAINER]:
                self.CONTENTS_MATCHERS[ctxt] = ContentInfos(
                    mode=configs[MODE_TAG], id_matcher=id_verbatim)

# Mode not implemented.
            else:
                raise ValueError(
                    "BUG to report : mode ``{0}`` not implemented".format(
                        configs[MODE_TAG]))

# -- WALKING IN THE CONTENT -- #

    def nextline(self):
        """
property::
    yield = str ;
            each line of ``self.content``.
        """
        if isinstance(self._content, str):
            for line in StringIO(self._content):
                self._nbline += 1
                yield line.rstrip()

        else:
            with self._content.open(mode="r",
                                    encoding=self.encoding) as peuffile:
                for line in peuffile:
                    self._nbline += 1
                    yield line.rstrip()

# -- INDENTATION -- #

    def manage_indent(self):
        """
property::
    action = the level of indention is calculated and the leading indentation
             of ``self._line`` is removed (one tabulation is exactly equal to
             four spaces).
        """
        if self._line \
        and self._level != self.INFINITY:
            self._level = 0

            for char in self._line:
                if char == ' ':
                    self._level += 1

                elif char == '\t':
                    self._level += 4

                else:
                    break

            self._oldline = self._line

            self._line = " " * (self._level % 4) + self._oldline.lstrip()
            self._level //= 4

# -- REGEXES -- #

    def match(self, text, infos):
        """
property::
    arg = str: text ;
          this string is a text where we look for some metadatas (a context or
          a data content)
    arg = CtxtInfos, ContentInfos: infos ;
          this indicates which matcher must be used to test a matching on the
          argument ``text``

    return = bool ;
             ``True`` or ``False`` whether something matches or not
        """
        for oneid in infos.id_matcher:
            match_found = True
            self._groups_found = {}

            # Looking for the first winning matching.
            for boolwanted, thematchers \
            in self.MATCHERS[oneid].items():
                for onematcher in thematchers:
                    search = onematcher.search(text)

                    if bool(search) != boolwanted:
                        match_found = False
                        break

# Do we have groups to stored ?
                    elif search:
                        self._groups_found.update(search.groupdict())

                if match_found is False:
                    break

            if match_found is True:
                break

# We have a winning matching or not.
        return match_found

# -- BUILD THE AST -- #

    def build(self):
        """
prototype::
    action = this method calls all the methods needed so as to build the
             abstract syntax tree.
        """
        # Internal attributs
        self._nbline = 0
        self._line = None

        self._verbatim = False

        self._level = 0
        self._levels_stack = []

        self._ctxts_opened_stack = []
        self._ctxt_sbctxts_stack = []

        # Intermediate AST only for contexts.
        with self._partial_view:
            for self._line in self.nextline():
                self.search_ctxts()

            self.close_ctxt_at_end()

# Final AST with datas in contents.
        with self.view:
            self.search_contents()

# The partial view is not usefull in the disk.
        self._partial_view.remove()

# -- LOOKING FOR CONTEXTS -- #

    def search_ctxts(self):
        """
prototype::
    action = this method looks for contexts which can be either opening or
             closing blocks or comments, or empty lines, or lines of contents.
        """
        ctxtfound = False
        mustclose_otherctxts = False

        # Do we close the last context opened ?
        if self._ctxts_opened_stack:
            closeby_id = self.CLOSING_ID_FROM_OPENING.get(
                self._ctxts_opened_stack[-1].kind, None)

            if closeby_id:
                ctxtinfos = self.CTXTS_MATCHERS[closeby_id]

                if self.match(self._line, ctxtinfos):
                    ctxtfound = True

# Other contexts must be searched.
        if not ctxtfound:
            for ctxtinfos in self.CTXTS_MATCHERS:
                # Not a subcontext ?
                if self._ctxt_sbctxts_stack \
                and (
                    ctxtinfos.openclose,
                    ctxtinfos.kind
                ) not in self._ctxt_sbctxts_stack[-1]:
                    continue

# A new context found.
                if self.match(self._line, ctxtinfos):
                    ctxtfound = True
                    mustclose_otherctxts = bool(ctxtinfos.openclose == OPEN)
                    break

# Now that a context has been found, or not, we can manage indentation.
        self.manage_indent()

        # Unvisible new context (be careful of indentation closing)
        if not ctxtfound:
            ctxtinfos = self.CTXTINFOS_CONTENT
            mustclose_otherctxts = True

# Level can be forced to infinity.
        if ctxtinfos.kind in self.CTXTS_KINDS_WITH_INF_LEVELS \
        and ctxtinfos.openclose != AUTOCLOSE:
            self._level = self.INFINITY

# Close previous contexts.
        if mustclose_otherctxts:
            self.close_indented_ctxts(ctxtinfos)

# Add an opening context in the stack.
        if ctxtinfos.openclose == OPEN:
            self._ctxts_opened_stack.append(ctxtinfos)

            # Do we have to use subcontexts ?
            if ctxtinfos.kind in self.CTXTS_KINDS_SUBCTXTS:
                self._ctxt_sbctxts_stack.append(
                    self.CTXTS_KINDS_SUBCTXTS[ctxtinfos.kind])

# A closing context.
        elif ctxtinfos.openclose == CLOSE:
            if not self._ctxts_opened_stack:
                raise ASTError("wrong closing context: see line #{0}".format(
                    self._nbline))

            lastctxt = self._ctxts_opened_stack.pop(-1)

            if lastctxt.kind != ctxtinfos.kind:
                raise ASTError(
                    "wrong closing context: " \
                    + "see line no.{0} and context \"{1}\"".format(
                        self._nbline, ctxtinfos.kind
                    )
                )

            self._ctxt_sbctxts_stack.pop(-1)
            self._levels_stack.pop(-1)
            self._level = 0

# We can store the new and eventually close some old contexts.
# # --- UGLY DEBUG --- #
# print("AST -->", ctxtinfos)
        self.store_one_ctxt(ctxtinfos)

    def must_close_indented_ctxt(self):
        """
prototype::
    return = bool ;
             ``True`` or ``False`` whether we have to close or not the actual
             context due to the indentation
        """
        return self._levels_stack and self._level <= self._levels_stack[-1]

    def close_indented_ctxts(self, ctxtinfos):
        """
prototype::
    action = this method closes all contexts that use indentation for their
             content.
        """
        # Sections close all blocks !
        if ctxtinfos.kind == SECTION_TAG:
            if ctxtinfos.openclose == OPEN:
                while self._ctxts_opened_stack:
                    self._levels_stack.pop(-1)

                    lastctxt = self._ctxts_opened_stack.pop(-1)

                    if not lastctxt.indented:
                        break

                    self.store_one_ctxt(CtxtInfos(kind=lastctxt.kind,
                                                  openclose=CLOSE),
                                        not_add_groups_alone=False)

            self._ctxts_opened_stack = self._levels_stack = []

# Not a section. What can't close an indented contexts ?
#     * Verbatim contents
#     * Empty lines
#     * Autoclosed context
#     * Comments on a single line
        elif self._ctxts_opened_stack \
        and not self._ctxts_opened_stack[-1].verbatim \
        and ctxtinfos != self.CTXTINFOS_EMPTYLINE \
        and ctxtinfos.openclose != AUTOCLOSE \
        and ctxtinfos.kind not in COMMENTS_ON_JUST_ONELINE:
            if self._levels_stack \
            and self._levels_stack[-1] != self.INFINITY:
                while self.must_close_indented_ctxt():
                    self._levels_stack.pop(-1)

                    lastctxt = self._ctxts_opened_stack.pop(-1)

                    self.store_one_ctxt(CtxtInfos(kind=lastctxt.kind,
                                                  openclose=CLOSE),
                                        not_add_groups_alone=False)

# We update the stack of levels.
        if ctxtinfos.openclose == OPEN:
            if self._levels_stack \
            and self._level != self._levels_stack[-1]:
                self._levels_stack.append(self._level)

            else:
                self._levels_stack = [self._level]

# Autoclose context with infinite level do not change the levels !
        elif ctxtinfos.openclose == AUTOCLOSE \
        and self._levels_stack \
        and self._level == self.INFINITY:
            self._level = self._levels_stack[-1]

# Close context with infinite level need to clean the stack of levels !
        elif ctxtinfos.openclose == CLOSE \
        and self._levels_stack \
        and self._level == self.INFINITY:
            self._levels_stack.pop(-1)

            if self._levels_stack:
                self._level = self._levels_stack[-1]

            else:
                self._level = 0

# Ugly patch !
        if self._level == self.INFINITY:
            self._level = 0

    def close_ctxt_at_end(self):
        """
prototype::
    action = this method closes all contexts than can be closed automatically
             at the very end of the ¨orpyste file
        """
        while self._ctxts_opened_stack:
            lastctxt_kind = self._ctxts_opened_stack.pop(-1).kind

            if lastctxt_kind not in self.CTXTS_KINDS_CLOSED_AT_END:
                raise ASTError(
                    "unclosed context: " \
                    + "see line no.{0} and context \"{1}\"".format(
                        self._nbline, lastctxt_kind
                    )
                )

            self.store_one_ctxt(CtxtInfos(kind=lastctxt_kind, openclose=CLOSE))

# -- LOOKING FOR DATAS IN CONTENTS -- #

    def search_contents(self):
        """
prototype::
    action = this method looks for datas in contents regarding the mode of the
             blocks.
        """
        _defaultmatcher = self.CONTENTS_MATCHERS.get(DEFAULT, None)
        self._matcherstack = []
        self._nb_emptylines = 0

        for onemeta in self.next_partial_meta():
            # --- IMPORTANT : UGLY DEBUG --- #
            # print("AST >>>", onemeta, "\n" + " "*7, self._levels_stack);continue

            # The big messe of empty lines in verbatim content.
            # One new block.
            if onemeta[KIND_TAG] == BLOCK_TAG:
                if onemeta[OPENCLOSE] == OPEN:
                    # Preceding block must be a container !
                    if not self.last_block_is_container():
                        raise ASTError(
                            "last block not a container, see line nb.{0}" \
                                .format(onemeta[NBLINE_TAG])
                        )

                    matcher = self.CONTENTS_MATCHERS.get(
                        onemeta[GRPS_FOUND_TAG][NAME_TAG], _defaultmatcher)

                    if not matcher:
                        raise ASTError(
                            "last block << {0} >> is illegal, see line nb.{1}" \
                                .format(
                                    onemeta[GRPS_FOUND_TAG][NAME_TAG],
                                    onemeta[NBLINE_TAG]
                                )
                        )

# We must know the mode used by this block.
                    onemeta[MODE_TAG] = matcher.mode

                    self._matcherstack.append(matcher)

                else:
                    self._matcherstack.pop(-1)

# Some content.
            elif onemeta[KIND_TAG] == SPE_CONTENT_TAG:
                if not self._matcherstack:
                    raise ASTError("no block before, see line nb.{0}".format(
                        onemeta[NBLINE_TAG]))

# A good content ?
                if self.match(onemeta[CONTENT_TAG], self._matcherstack[-1]):
                    # We have to remove escaped character ``\::``.
                    if 'value_in_line' in self._groups_found:
                        value_in_line = self._groups_found['value_in_line']

                        if value_in_line.endswith("\::"):
                            value_in_line = value_in_line[:-3] + "::"
                            self._groups_found['value_in_line'] = value_in_line

                    onemeta[CONTENT_TAG] = self._groups_found

# We can add the metadatas.
            self.add(onemeta)

    def last_block_is_container(self):
        """
prototype::
    return = bool ;
             ``True`` or ``False`` whether the last block opened is or not a
             container
        """
        if self._matcherstack:
            return self._matcherstack[-1].mode == CONTAINER

        return True

# -- STORING THE METADATAS -- #

    def add(self, metadatas):
        self.view.write(metadatas)

    def add_partial(self, metadatas):
        self._partial_view.write(metadatas)

    def next_partial_meta(self):
        for x in self._partial_view:
            yield x

    def store_one_ctxt(self, ctxtinfos, not_add_groups_alone=True):
        metadatas = {
            KIND_TAG: ctxtinfos.kind,
            NBLINE_TAG: self._nbline,
        }

        if ctxtinfos.openclose:
            if ctxtinfos.openclose == AUTOCLOSE:
                metadatas[OPENCLOSE] = OPEN

            else:
                metadatas[OPENCLOSE] = ctxtinfos.openclose

        if ctxtinfos.verbatim:
            verbatimstart = self._groups_found.get(CONTENT_TAG, None)

            if verbatimstart is not None:
                del self._groups_found[CONTENT_TAG]

        else:
            verbatimstart = None

        if not_add_groups_alone and self._groups_found:
            metadatas[GRPS_FOUND_TAG] = self._groups_found

        if verbatimstart is not None:
            verbatimstart = {
                KIND_TAG: VERB_CONTENT_TAG,
                NBLINE_TAG: self._nbline,
                CONTENT_TAG: verbatimstart,
            }

# We have to keep extra indentations !
        if ctxtinfos.kind == SPE_CONTENT_TAG:
            if self._ctxts_opened_stack[-1].kind[:7] == "comment":
                extra = ""
                self._line = self._oldline

            elif self._levels_stack \
            and self._levels_stack[-1] != self.INFINITY \
            and self._level != self.INFINITY:
                if self._levels_stack \
                and self._level > self._levels_stack[-1]:
                    extra = " " * 4 * (self._level - self._levels_stack[-1] -
                                       1)

                else:
                    extra = " " * self._level

            else:
                extra = ""

            metadatas[CONTENT_TAG] = "{0}{1}".format(extra, self._line)

            if self._verbatim:
                metadatas[KIND_TAG] = VERB_CONTENT_TAG

# We must change emtylines in comment to a verbatim empty content.
        elif ctxtinfos.kind == EMPTYLINE_TAG and self._verbatim:
            metadatas[KIND_TAG] = VERB_CONTENT_TAG
            metadatas[CONTENT_TAG] = ""

        if verbatimstart and ctxtinfos.openclose == CLOSE:
            metadatas, verbatimstart = verbatimstart, metadatas

        if metadatas:
            self.add_partial(metadatas)

        if ctxtinfos.verbatim:
            if verbatimstart:
                self.add_partial(verbatimstart)

            if ctxtinfos.openclose == OPEN:
                self._verbatim = True

            elif ctxtinfos.openclose == CLOSE:
                self._verbatim = False

        if ctxtinfos.openclose == AUTOCLOSE:
            new_metadatas = {k: v for k, v in metadatas.items()}
            new_metadatas[OPENCLOSE] = CLOSE
            self.add_partial(new_metadatas)

# -- MAGIC METHOD -- #

    def __iter__(self):
        for x in self.view:
            yield x
Esempio n. 4
0
    def build(self):
# We build the AST view.
        self.ast = self.AST(
            mode    = self.mode,
            content = self.content
        )

        self.ast.build()

        if self.ast.view.mode == "list":
            self.walk_view = IOView(self.ast.view.mode)

        else:
            self.walk_view = IOView(
                mode = self.ast.view.mode,
                path = self.ast.view.datas.with_suffix(".walk")
            )

        with self.walk_view:
# -- START OF THE WALK -- #
            self.start()

# We must keep all metadatas for fine tuning in the attribut ``self.metadata``
# that contains all the necessary informations.
            self.incomment   = False
            self.indentlevel = -1

            self.last_mode   = ""
            self.modes_stack = []
            self.names_stack = []

            self.nb_empty_verbline = 0

            self.kv_nbline = -1
            lastkeyval     = {}

            for self.metadata in self.ast:
                # --- IMPORTANT : UGLY DEBUG --- #
                # print("--- @@@@@ ---", self.metadata,sep="\n");continue

                kind = self.metadata[KIND_TAG]
                self.nbline = self.metadata[NBLINE_TAG]


# -- COMMENT -- #

                if kind.startswith("comment-"):
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.incomment = True

# Verbatim content is verbatim !!!!
                        if self.last_mode == VERBATIM:
                            self._add_empty_verbline()

                        self.open_comment(kind[8:])

                    else:
                        self.incomment = False
                        self.close_comment(kind[8:])


# -- COMMENT LINE -- #

                elif kind == VERB_CONTENT_TAG:
                    self.content_in_comment(self.metadata[CONTENT_TAG])


# -- EMPTY LINE -- #

                elif kind == EMPTYLINE_TAG:
                    if self.incomment:
                        self.content_in_comment("")

                    elif self.last_mode == VERBATIM:
                        self.nb_empty_verbline += 1


# -- BLOCK -- #

                elif kind == BLOCK_TAG:
# An opening block
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.indentlevel += 1

                        self.last_mode = self.metadata[MODE_TAG]
                        self.modes_stack.append(self.last_mode)

                        name = self.metadata[GRPS_FOUND_TAG][NAME_TAG]
                        self.names_stack.append(name)
                        self.open_block(name)

# For block with a content, we have to augment the value of the indentation.
                        if self.last_mode != CONTAINER:
                            self.indentlevel += 1

# We have to manage key-value modes fo which a value can be written over
# several lines !
                        if self.last_mode.endswith(KEYVAL):
                            lastkeyval = {}
                            keysused   = []

# A closing block
                    else:
                        if self.last_mode == VERBATIM:
                            self.nb_empty_verbline = 0

                        name = self.names_stack.pop(-1)

# Do we have a key-value couple ?
                        if lastkeyval:
                            self.add_keyval(lastkeyval)
                            lastkeyval = {}
                            keysused   = []
                            self.indentlevel -= 1

# We have to take care of last comments in a block
                            self.kv_nbline = float("inf")
                            self.close_block(name)
                            self.kv_nbline = -1

                        else:
# Are we closing a block with a content ?
                            if self.last_mode != CONTAINER:
                                self.indentlevel -= 1

                            self.close_block(name)

                        self.indentlevel -= 1
                        self.modes_stack.pop(-1)

                        if self.modes_stack:
                            self.last_mode = self.modes_stack[-1]
                        else:
                            self.last_mode = ""


# -- MAGIC COMMENT -- #

                elif kind == MAGIC_COMMENT:
                    if self.last_mode != VERBATIM:
                        raise PeufError(
                            "magic comment not used for a verbatim content"
                        )

                    if self.metadata[OPENCLOSE] == OPEN:
                        self._add_empty_verbline()
                        self.add_magic_comment()


# -- VERBATIM CONTENT -- #

                elif self.last_mode == VERBATIM:
                    self._add_empty_verbline()
                    self.add_line(self.metadata[CONTENT_TAG][VAL_IN_LINE_TAG])


# -- KEY-VAL CONTENT -- #

                else:
                    content = self.metadata[CONTENT_TAG]

                    if VAL_IN_LINE_TAG in content:
                        if not lastkeyval:
                            raise PeufError(
                                "missing first key, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]
                                )
                            )

                        lastkeyval[VAL_TAG] \
                        += " " + content[VAL_IN_LINE_TAG].strip()
                        self.kv_nbline = self.metadata[NBLINE_TAG]

                    else:
                        if lastkeyval:
                            self.add_keyval(lastkeyval)

                        self.kv_nbline = self.metadata[NBLINE_TAG]
                        key            = content[KEY_TAG]

                        if self.last_mode == KEYVAL and key in keysused:
                            raise PeufError(
                                "key already used, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]
                                )
                            )

                        keysused.append(key)

                        lastkeyval = content


# -- END OF THE WALK -- #

            self.end()

        self.builddone = True

        return self
Esempio n. 5
0
class WalkInAST():
    """
prototype::
    see = parse.ast.AST

    arg-attr = pathlib.Path, str: content ;
               see the documentation of ``parse.ast.AST``
    arg-attr = str, dict: mode ;
               see the documentation of ``parse.ast.AST``
    arg-attr = str: encoding = "utf-8" ;
               see the documentation of ``parse.ast.AST``
    arg-attr = bool: build_asts = True ;
               **this variable is only useful for a content in a file.**
               ``build_asts = True`` indicates to analyse a file and to produce
               temporary files, whereas ``build_asts = False`` asks to use
               the temporary files (this is a way to store physically the
               partial analysis)
    arg-attr = bool: remove_asts = True ;
               **this variable is only useful for a content in a file.**
               ``remove_asts = True`` indicates to remove temporary files built
               to analyze a file, whereas ``remove_asts = False`` asks to keep
               the temporary files (this is a way to store physically the
               partial analysis)

    attr = orpyste.tools.ioview.IOView: walk_view ;
           this is the attribut to use if you want to store information during
           the walk.
    attr = str: last_mode ;
           this string is the mode of the very last block opened 
    attr = list: modes_stack ;
           this stack list contains the modes of the last blocks opened 
    attr = dict: metadata ;
           this is a dictionary sent when using ``for metadata in oneast: ...``
           where ``oneast`` is an instance of ``parse.ast.AST``. This gives you
           all informations about the current piece of the AST.


warning::
    This class only implements the walking but she doesn't acheive any action.
    To do something, you have to subclass ``WalkInAST`` and to implement what
    you need in the following methods (see their documentations for more
    informations and also the class ``orpyste.data.Read`` for one real example
    of use).

        * ``start`` and ``end`` are methods called just before and after the
        walk.

        * ``open_comment`` and ``close_comment`` are called when a comment has
        to be opened or closed, whereas ``content_in_comment`` allows to add a
        content met inside a comment.

        * ``open_block`` and ``close_block`` are methods called just before and
        after a block is opened or closed respectively.

        * ``add_keyval`` can add a key-separator-value data.

        * ``add_line`` allows to add a single verbatim line.
    """
    AST = AST

    def __init__(
        self,
        content,
        mode,
        encoding    = "utf-8",
        build_asts  = True,
        remove_asts = True
    ):
        self.content  = content
        self.mode     = mode
        self.encoding = encoding

        self.build_asts  = build_asts
        self.remove_asts = remove_asts

        self.builddone = False


    def build(self):
# We build the AST view.
        self.ast = self.AST(
            mode    = self.mode,
            content = self.content
        )

        self.ast.build()

        if self.ast.view.mode == "list":
            self.walk_view = IOView(self.ast.view.mode)

        else:
            self.walk_view = IOView(
                mode = self.ast.view.mode,
                path = self.ast.view.datas.with_suffix(".walk")
            )

        with self.walk_view:
# -- START OF THE WALK -- #
            self.start()

# We must keep all metadatas for fine tuning in the attribut ``self.metadata``
# that contains all the necessary informations.
            self.incomment   = False
            self.indentlevel = -1

            self.last_mode   = ""
            self.modes_stack = []
            self.names_stack = []

            self.nb_empty_verbline = 0

            self.kv_nbline = -1
            lastkeyval     = {}

            for self.metadata in self.ast:
                # --- IMPORTANT : UGLY DEBUG --- #
                # print("--- @@@@@ ---", self.metadata,sep="\n");continue

                kind = self.metadata[KIND_TAG]
                self.nbline = self.metadata[NBLINE_TAG]


# -- COMMENT -- #

                if kind.startswith("comment-"):
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.incomment = True

# Verbatim content is verbatim !!!!
                        if self.last_mode == VERBATIM:
                            self._add_empty_verbline()

                        self.open_comment(kind[8:])

                    else:
                        self.incomment = False
                        self.close_comment(kind[8:])


# -- COMMENT LINE -- #

                elif kind == VERB_CONTENT_TAG:
                    self.content_in_comment(self.metadata[CONTENT_TAG])


# -- EMPTY LINE -- #

                elif kind == EMPTYLINE_TAG:
                    if self.incomment:
                        self.content_in_comment("")

                    elif self.last_mode == VERBATIM:
                        self.nb_empty_verbline += 1


# -- BLOCK -- #

                elif kind == BLOCK_TAG:
# An opening block
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.indentlevel += 1

                        self.last_mode = self.metadata[MODE_TAG]
                        self.modes_stack.append(self.last_mode)

                        name = self.metadata[GRPS_FOUND_TAG][NAME_TAG]
                        self.names_stack.append(name)
                        self.open_block(name)

# For block with a content, we have to augment the value of the indentation.
                        if self.last_mode != CONTAINER:
                            self.indentlevel += 1

# We have to manage key-value modes fo which a value can be written over
# several lines !
                        if self.last_mode.endswith(KEYVAL):
                            lastkeyval = {}
                            keysused   = []

# A closing block
                    else:
                        if self.last_mode == VERBATIM:
                            self.nb_empty_verbline = 0

                        name = self.names_stack.pop(-1)

# Do we have a key-value couple ?
                        if lastkeyval:
                            self.add_keyval(lastkeyval)
                            lastkeyval = {}
                            keysused   = []
                            self.indentlevel -= 1

# We have to take care of last comments in a block
                            self.kv_nbline = float("inf")
                            self.close_block(name)
                            self.kv_nbline = -1

                        else:
# Are we closing a block with a content ?
                            if self.last_mode != CONTAINER:
                                self.indentlevel -= 1

                            self.close_block(name)

                        self.indentlevel -= 1
                        self.modes_stack.pop(-1)

                        if self.modes_stack:
                            self.last_mode = self.modes_stack[-1]
                        else:
                            self.last_mode = ""


# -- MAGIC COMMENT -- #

                elif kind == MAGIC_COMMENT:
                    if self.last_mode != VERBATIM:
                        raise PeufError(
                            "magic comment not used for a verbatim content"
                        )

                    if self.metadata[OPENCLOSE] == OPEN:
                        self._add_empty_verbline()
                        self.add_magic_comment()


# -- VERBATIM CONTENT -- #

                elif self.last_mode == VERBATIM:
                    self._add_empty_verbline()
                    self.add_line(self.metadata[CONTENT_TAG][VAL_IN_LINE_TAG])


# -- KEY-VAL CONTENT -- #

                else:
                    content = self.metadata[CONTENT_TAG]

                    if VAL_IN_LINE_TAG in content:
                        if not lastkeyval:
                            raise PeufError(
                                "missing first key, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]
                                )
                            )

                        lastkeyval[VAL_TAG] \
                        += " " + content[VAL_IN_LINE_TAG].strip()
                        self.kv_nbline = self.metadata[NBLINE_TAG]

                    else:
                        if lastkeyval:
                            self.add_keyval(lastkeyval)

                        self.kv_nbline = self.metadata[NBLINE_TAG]
                        key            = content[KEY_TAG]

                        if self.last_mode == KEYVAL and key in keysused:
                            raise PeufError(
                                "key already used, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]
                                )
                            )

                        keysused.append(key)

                        lastkeyval = content


# -- END OF THE WALK -- #

            self.end()

        self.builddone = True

        return self


# -- START AND END OF THE WALK -- #

    def start(self):
        """
This method is called just before the walk starts.
        """
        ...

    def end(self):
        """
This method is called just after the end of the walk.
        """
        ...

    def remove_extras(self):
        self.ast.view.remove()
        self.walk_view.remove()


# -- COMMENTS -- #

    def open_comment(self, kind):
        """
prototype::
    arg = str: kind in ["singleline", "multilines", "multilines-singleline"] ;
          ``kind = "singleline"`` is for orpyste::``// ...``,
          ``kind = "multilines"`` is for orpyste::``/* ... */`` where the
          content contains at least one back return, and
          ``kind = "multilines-singleline"`` is for orpyste::``/* ... */``
          which is all in a single line


This method is for opening a comment. No content is given there (see the method
``content_in_comment``).
        """
        ...

    def close_comment(self, kind):
        """
prototype::
    arg = str: kind in ["singleline", "multilines", "multilines-singleline"] ;
          ``kind = "singleline"`` is for orpyste::``// ...``,
          ``kind = "multilines"`` is for orpyste::``/* ... */`` where the
          content contains at least one back return, and
          ``kind = "multilines-singleline"`` is for orpyste::``/* ... */``
          which is all in a single line


This method is for closing a comment. No content is given there (see the method
``content_in_comment``).
        """
        ...

    def content_in_comment(self, line):
        """
prototype::
    arg = str: line


This method is for adding content inside a comment (see the methods
``open_comment`` and ``close_comment``).
        """
        ...


# -- BLOCKS -- #

    def open_block(self, name):
        """
prototype::
    arg = str: name


This method is for opening a new block knowning its name.
        """
        ...

    def close_block(self, name):
        """
This method is for closing a block knowning its name.
        """
        ...


# -- (MULTI)KEYVAL -- #

    def add_keyval(self, keyval):
        """
prototype::
    arg = {"key": str, "sep": str, "value": str}: keyval


This method is for adding a new key with its associated value and separator.
All this informations are in the dictionary ``keyval``.
        """
        ...


# -- VERBATIM -- #

# We have to take care of the last empty lines !!!
    def _add_empty_verbline(self):
        if self.nb_empty_verbline:
            self.nbline -= self.nb_empty_verbline + 1

            for _ in range(self.nb_empty_verbline):
                self.nbline += 1
                self.add_line("")

            self.nbline += 1
            self.nb_empty_verbline = 0

    def add_line(self, line):
        """
prototype::
    arg = str: line


This method is for adding verbatim content.
        """
        ...

    def add_magic_comment(self):
        """
This method is for adding the magic comment used for empty lines at the end of
verbatim contents.
        """
        ...


# -- CONTEXT MANAGER -- #

    def __enter__(self):
# We have to always build asts if the content is a string !
        if self.build_asts \
        or not isinstance(self.content, str):
            self.build()

        return self

    def __exit__(self, type, value, traceback):
        if self.remove_asts:
            self.remove_extras()
Esempio n. 6
0
class AST():
    """
prototype::
    see = Mode, CtxtInfos, ContentInfos

    arg-attr = pathlib.Path, str: content ;
               ``content`` can be an instance of the class ``pathlib.Path``,
               that is a file given using its path, or ``content`` can be a
               string with all the content to be analyzed (see the attribut
               ``view``)
    arg-attr = str, dict: mode ;
               an ¨orpyste mode that can use different kinds of syntax (see the
               documentation of the class ``Mode``)
    arg-attr = str: encoding = "utf-8" ;
               a well named argument...


    attr = file, io.StringIO: view ;
           this attribut contains a verbose and easy to read version of the
           abstract syntax tree in either a pickle file if the argument attribut
           ``content`` is a ``pathlib.Path``, or a ``io.StringIO`` if the
           argument attribut ``content`` is a string

    method = build ;
             you have to call this method each time you must build, or rebuild,
             the abstract syntax tree


This class can build an Abstract Syntax Tree (AST) view of a merely ¨orpyste
file. We have written "merely" because here we allow some semantic illegal
¨peuf syntaxes. This will the job of ``parse.Walk`` to manage this kind of
errors among some other ones.


Here is a very simple example showing how to build the AST view and how to walk
in this view.

pyterm:
    >>> from orpyste.parse.ast import AST
    >>> content = '''
    ... test::
    ...     Missing a key-val first !
    ...     a = 3
    ... '''.strip()
    >>> mode = {
    ...     'container': ":default:",
    ...     'keyval::=': "test",
    ...     'verbatim' : "summary"
    ... }
    >>> ast = AST(content = content, mode = mode)
    >>> ast.build()
    >>> from pprint import pprint # For pretty printings of dictionaries.
    >>> for metadata in ast:
    ...     pprint(metadata)
    {'groups_found': {'name': 'test'},
     'kind': 'block',
     'nbline': 1,
     'mode': 'keyval',
     'openclose': 'open'}
    {'content': {'value_in_line': 'Missing a key-val first !'},
     'kind': ':content:',
     'nbline': 2}
    {'content': {'key': 'a', 'sep': '=', 'value': '3'},
     'kind': ':content:',
     'nbline': 3}
    {'kind': 'block', 'nbline': 3, 'openclose': 'close'}


warning::
    This class does not do any semantic analysis as we can see in the example
    where the content of the block orpyste::``test`` starts with an inline value
    instead of a key-value one. This will the job of ``parse.Walk`` to manage
    semantic problems.
    """
# CONFIGURATIONS OF THE CONTEXTS [human form]
#
# The CTXTS_CONFIGS are sorted from the first to be tested to the last one.

    CLOSED_BY_INDENT_ID, CLOSED_AT_END_ID, VERBATIM_ID = range(3)

# If the two following key are not used, this will means "use all possible
# contexts inside me". The name of the context cannot look like ``:onename:``
# with double points.
    SUBCTXTS       = "subcontexts"
    INFINITY_LEVEL = "inf-level"

    CTXTS_CONFIGS = OrderedDict()

# The missing ``CLOSE`` indicates an auto-close context.
#
# << Warning ! >> The group name ``content`` indicates to put matching in a
# content line like context.
    CTXTS_CONFIGS[MAGIC_COMMENT] = {
        OPEN          : "^////$",
        INFINITY_LEVEL: True,          # This allows to force the level.
        SUBCTXTS      : VERBATIM_ID    # This indicates no subcontext.
    }

    CTXTS_CONFIGS[COMMENT_SINGLELINE] = {
        OPEN          : "^//(?P<content>.*)$",
        INFINITY_LEVEL: True,
        SUBCTXTS      : VERBATIM_ID
    }

    CTXTS_CONFIGS[COMMENT_MULTILINES_SINGLELINE] = {
        OPEN          : "^/\*(?P<content>.*)\*/[ \t]*$",
        INFINITY_LEVEL: True,
        SUBCTXTS      : VERBATIM_ID
    }

    CTXTS_CONFIGS[COMMENT_MULTILINES] =  {
        OPEN            : "^/\*(?P<content>.*)$",
        CLOSE           : "^(?P<content>.*)\*/[ \t]*$",
        SUBCTXTS        : VERBATIM_ID,
        INFINITY_LEVEL  : True,
        CLOSED_AT_END_ID: True
    }

# ``CLOSE: CLOSED_BY_INDENT_ID`` indicates a context using indentation for its
# content.
#
# We can use tuple to indicate several patterns, and we can also use a special
# keyword ``not::`` for negate a regex (doing this in pure regex can be very
# messy).
    CTXTS_CONFIGS[BLOCK_TAG] = {
        OPEN: (
            "^(?P<name>[\d_a-zA-Z]+)::$",
            "not::^[\d_a-zA-Z]+\\\\::$"
        ),
        CLOSE           : CLOSED_BY_INDENT_ID,
        CLOSED_AT_END_ID: True
    }

    SPACES_PATTERN = "[ \\t]*"
    LINE_PATTERN   = "^.*$"

    KEY_GRP_PATTERN   = "(?P<key>.*?)"
    VALUE_GRP_PATTERN = "(?P<value>.*)"


    def __init__(
        self,
        content,
        mode,
        encoding = "utf-8"
    ):
# User's arguments.
        self.content  = content
        self.mode     = mode
        self.encoding = encoding

# Let's build our contexts' rules.
        self.build_ctxts_rules()
        self.build_contents_rules()


# -- SPECIAL SETTERS -- #

    @property
    def content(self):
        return self._content

    @content.setter
    def content(self, value):
        if isinstance(value, str):
            self._content      = value
            self._partial_view = IOView("list")
            self.view          = IOView("list")

        elif isinstance(value, Path):
            self._content = value

            self._partial_view = IOView(
                mode = "pickle",
                path = value.with_suffix(
                    "{0}.orpyste.partial.ast".format(value.suffix)
                )
            )

            self.view = IOView(
                mode = "pickle",
                path = value.with_suffix(
                    "{0}.orpyste.ast".format(value.suffix)
                )
            )

        else:
            raise TypeError("invalid type for the attribut ``content``.")


    @property
    def mode(self):
        return self._mode

    @mode.setter
    def mode(self, value):
        self._mode = Mode(value)


# -- INTERNAL CONTEXTS' RULES -- #

    def build_ctxts_rules(self):
        """
prototype::
    action = this method builds ¨python none human lists and dictionaries used
             to build an intermediate abstract syntax tree of the contexts
             which are either opening or closing blocks or comments, or empty
             lines, or lines of contents (you can breath now).
             This will be the job of ``self.build_contents_rules`` to take care
             of lines of contents.
        """
# MATCHERS FOR THE CONTEXTS [the E.T. experience]
#
# We build ¨python none human list for research with the following constraints.
#
#     1) We test all the open contexts and then the close ones.
#     2) We stop as soon as we find a winning matching.
#     3) We have to take care of subcontexts.
#     4) We store the regex objects in a list (think about the subcontexts).
#
# << Warning ! >> We add a matcher for empty line at the very beginning because
# we want to keep them but we have also have to skip them when searching for
# contexts.
        self.MATCHERS = [{
            True:                   # Boolean wanted.
            [re.compile("^$")]      # Liste or regexes to test.
        }]

        self.CTXTINFOS_EMPTYLINE = CtxtInfos(
            kind       = EMPTYLINE_TAG,
            id_matcher = 0    # See ``self.MATCHERS``.
        )

        self.CTXTINFOS_CONTENT = CtxtInfos(kind = SPE_CONTENT_TAG)

        self.CTXTS_MATCHERS = [self.CTXTINFOS_EMPTYLINE]

        self.CTXTS_KINDS_SUBCTXTS = {}

        self.INFINITY                    = float('inf')
        self.CTXTS_KINDS_WITH_INF_LEVELS = set()

        self.CTXTS_KINDS_CLOSED_AT_END = set()

        id_matcher = len(self.MATCHERS) - 1
        name2id    = {}

        for openclose in [OPEN, CLOSE]:
            for kind, configs in self.CTXTS_CONFIGS.items():
                if openclose in configs:
                    spec = configs[openclose]

# We do not keep the special keyword CLOSED_BY_INDENT_ID.
                    if openclose == CLOSE \
                    and spec == self.CLOSED_BY_INDENT_ID:
                        continue

# We manage other cases.
                    if isinstance(spec, (str, int)):
                        spec = [spec]

                    matcher    = {}
                    regex_grps = []

# A regex pattern.
                    for s in spec:
                        if s.startswith("not::"):
                            boolwanted = False
                            s = s[5:]

                        else:
                            boolwanted = True

                        pattern = re.compile(s)

# Do we have regex groups ?
                        regex_grps += [x for x in pattern.groupindex]

# We add a new regex.
                        if boolwanted in matcher:
                            matcher[boolwanted].append(pattern)

                        else:
                            matcher[boolwanted] = [pattern]

                    id_matcher += 1
                    self.MATCHERS.append(matcher)

                    _openclose = openclose

                    if CLOSE in configs:
                        if configs[CLOSE] == self.CLOSED_BY_INDENT_ID:
                            indented = True

                        else:
                            indented = False

                    else:
                        _openclose = AUTOCLOSE
                        indented    = False

                    if configs.get(self.CLOSED_AT_END_ID, False):
                        self.CTXTS_KINDS_CLOSED_AT_END.add(kind)

                    verbatim = (
                        self.SUBCTXTS in configs
                        and
                        configs[self.SUBCTXTS] == self.VERBATIM_ID
                    )

                    self.CTXTS_MATCHERS.append(
                        CtxtInfos(
                            kind       = kind,
                            openclose  = _openclose,
                            indented   = indented,
                            id_matcher = id_matcher,
                            regex_grps = regex_grps,
                            verbatim   = verbatim
                        )
                    )

                    name2id[(openclose, kind)] = id_matcher

# SUBCONTEXTS AND CONTEXT'S LEVEL
        for kind, configs in self.CTXTS_CONFIGS.items():
            if self.INFINITY_LEVEL in configs:
                self.CTXTS_KINDS_WITH_INF_LEVELS.add(kind)

            if self.SUBCTXTS in configs:
# Empty lines can appear anywhere !
                subctxts = [(
                    self.CTXTINFOS_EMPTYLINE.openclose,
                    self.CTXTINFOS_EMPTYLINE.kind
                )]

                if configs[self.SUBCTXTS] == self.VERBATIM_ID:
                    if (CLOSE, kind) in name2id:
                        subctxts.append((CLOSE, kind))

                else:
                    for kind in configs[self.SUBCTXTS]:
                        for openclose in [OPEN, CLOSE]:
                            if (openclose, kind) in name2id:
                                subctxts.append((openclose, kind))

                self.CTXTS_KINDS_SUBCTXTS[kind] = subctxts


# -- INTERNAL CONTENTS' RULES -- #

    def build_contents_rules(self):
        """
prototype::
    action = this method builds ¨python none human lists and dictionaries used
             to build from the intermediate abstract syntax tree of the
             contexts the final abstract syntax tree where the lines of contents
             have been analyzed.
        """
# Configurations of the patterns for datas in contexts
        self.CONTENTS_MATCHERS = {}

        id_matcher = len(self.MATCHERS)

# For the "verbatim" mode.
        self.MATCHERS.append({True: [re.compile("^(?P<value_in_line>.*)$")]})
        id_verbatim = id_matcher

# Let's work !
        for ctxt, configs in self.mode.items():
# "keyval" or "multikeyval" modes.
            if configs[MODE_TAG] in [KEYVAL, MULTIKEYVAL]:
# We must take care of separators with several characters, and we also have to
# escape special characters.
                seps = []

                for onesep in configs[SEPS_TAG]:
                    if len(onesep) != 1:
                        onesep = "({0})".format(re.escape(onesep))

                    else:
                        onesep = re.escape(onesep)

                    seps.append(onesep)

                pattern = re.compile(
                    "{spaces}{key}{spaces}(?P<sep>{seps}){spaces}{value}"
                        .format(
                            spaces = self.SPACES_PATTERN,
                            key    = self.KEY_GRP_PATTERN,
                            value  = self.VALUE_GRP_PATTERN,
                            seps   = "|".join(seps)
                        )
                )

                self.MATCHERS.append({True: [pattern]})
                id_matcher += 1

# Do we have regex groups ?
                regex_grps = [x for x in pattern.groupindex]

                self.CONTENTS_MATCHERS[ctxt] = ContentInfos(
                    mode       = configs[MODE_TAG],
                    id_matcher = [id_matcher, id_verbatim],
                    regex_grps = regex_grps,
                )

# "verbatim" and "container" modes.
            elif configs[MODE_TAG] in [VERBATIM, CONTAINER]:
                self.CONTENTS_MATCHERS[ctxt] = ContentInfos(
                    mode       = configs[MODE_TAG],
                    id_matcher = id_verbatim
                )

# Mode not implemented.
            else:
                raise ValueError(
                    "BUG to report : mode ``{0}`` not implemented".format(
                        configs[MODE_TAG]
                    )
                )

# -- WALKING IN THE CONTENT -- #

    def nextline(self):
        """
property::
    yield = str ;
            each line of ``self.content``.
        """
        if isinstance(self._content, str):
            for line in StringIO(self._content):
                self._nbline += 1
                yield line.rstrip()

        else:
            with self._content.open(
                mode     = "r",
                encoding = self.encoding
            ) as peuffile:
                for line in peuffile:
                    self._nbline += 1
                    yield line.rstrip()


# -- INDENTATION -- #

    def manage_indent(self):
        """
property::
    action = the level of indention is calculated and the leading indentation
             of ``self._line`` is removed (one tabulation is exactly equal to
             four spaces).
        """
        if self._line \
        and self._level != self.INFINITY:
            self._level = 0

            for char in self._line:
                if char == ' ':
                    self._level += 1

                elif char == '\t':
                    self._level += 4

                else:
                    break

            self._line = " "*(self._level % 4) + self._line.lstrip()
            self._level //= 4


# -- REGEXES -- #

    def match(self, text, infos):
        """
property::
    arg = str: text ;
          this string is a text where we look for some metadatas (a context or
          a data content)
    arg = CtxtInfos, ContentInfos: infos ;
          this indicates which matcher must be used to test a matching on the
          argument ``text``

    return = bool ;
             ``True`` or ``False`` whether something matches or not
        """
        for oneid in infos.id_matcher:
            match_found = True
            self._groups_found = {}

# Looking for the first winning matching.
            for boolwanted, thematchers \
            in self.MATCHERS[oneid].items():
                for onematcher in thematchers:
                    search = onematcher.search(text)

                    if bool(search) != boolwanted:
                        match_found = False
                        break

# Do we have groups to stored ?
                    elif search:
                        self._groups_found.update(search.groupdict())

                if match_found is False:
                    break

            if match_found is True:
                break

# We have a winning mathcing.
        return match_found


# -- BUILD THE AST -- #

    def build(self):
        """
prototype::
    action = this method calls all the methods needed so as to build the
             abstract syntax tree.
        """
# Internal attributs
        self._nbline = 0
        self._line   = None

        self._level              = 0
        self._levels_stack       = []
        self._ctxts_stack        = []
        self._ctxt_sbctxts_stack = []

# Intermediate AST only for contexts.
        with self._partial_view:
            self._verbatim = False

            for line in self.nextline():
                self._line = line
                self.manage_indent()
                self.search_ctxts()

            self.close_ctxt_at_end()


# Final AST with datas in contents.
        with self.view:
            self.search_contents()


# The partial view is not usefull in the idsk.
        self._partial_view.remove()


# -- LOOKING FOR CONTEXTS -- #

    def search_ctxts(self):
        """
prototype::
    action = this method looks for contexts which can be either opening or
             closing blocks or comments, or empty lines, or lines of contents.
        """
        nocontextfound = True

        for ctxtinfos in self.CTXTS_MATCHERS:
# Not a subcontext ?
            if self._ctxt_sbctxts_stack \
            and (
                ctxtinfos.openclose,
                ctxtinfos.kind
            ) not in self._ctxt_sbctxts_stack[-1]:
                continue

# A new context.
            if self.match(self._line, ctxtinfos):
                nocontextfound = False

# Level can be forced to infinity.
                if ctxtinfos.kind in self.CTXTS_KINDS_WITH_INF_LEVELS \
                and ctxtinfos.openclose != AUTOCLOSE:
                    self._level = self.INFINITY

# A new opening context.
                if ctxtinfos.openclose == OPEN:
                    self._ctxts_stack.append(ctxtinfos)

# Do we have to use subcontexts ?
                    if ctxtinfos.kind in self.CTXTS_KINDS_SUBCTXTS:
                        self._ctxt_sbctxts_stack.append(
                            self.CTXTS_KINDS_SUBCTXTS[ctxtinfos.kind]
                        )

# A closing context.
                elif ctxtinfos.openclose == CLOSE:
                    if not self._ctxts_stack:
                        raise ASTError(
                            "wrong closing context: see line #{0}".format(
                                self._nbline
                            )
                        )

                    lastctxt = self._ctxts_stack.pop(-1)

                    if lastctxt.kind != ctxtinfos.kind:
                        raise ASTError(
                            "wrong closing context: " \
                            + "see line no.{0} and context \"{1}\"".format(
                                self._nbline, ctxtinfos.kind
                            )
                        )

                    self._ctxt_sbctxts_stack.pop(-1)

                break

# Not a visible new context (be careful of indentation closing)
        if nocontextfound:
            ctxtinfos = self.CTXTINFOS_CONTENT

# We can store the new and eventually close some old contexts.
        self.close_indented_ctxts(ctxtinfos)
        self.store_one_ctxt(ctxtinfos)


    def must_close_indented_ctxt(self):
        """
prototype::
    return = bool ;
             ``True`` or ``False`` whether we have to close or not the actual
             context due to the indentation
        """
        if self._levels_stack:
            return self._level <= self._levels_stack[-1]

        return False


    def close_indented_ctxts(self, ctxtinfos):
        """
prototype::
    action = this method closes all contexts that use indentation for their
             content.
        """
# Empty lines, autoclosed context or context with infinite level are the only
# contexts that can't close an indented context.
        if ctxtinfos != self.CTXTINFOS_EMPTYLINE \
        and ctxtinfos.openclose != AUTOCLOSE \
        and self._level != self.INFINITY:
            if self._levels_stack \
            and self._levels_stack[-1] != self.INFINITY:
                while self.must_close_indented_ctxt():
                    self._levels_stack.pop(-1)

                    lastctxt = self._ctxts_stack.pop(-1)

                    self.store_one_ctxt(
                        CtxtInfos(
                            kind                 = lastctxt.kind,
                            openclose            = CLOSE),
                            not_add_groups_alone = False
                    )

# We update the stack of levels.
        if ctxtinfos.openclose == OPEN:
            if self._levels_stack \
            and self._level != self._levels_stack[-1]:
                self._levels_stack.append(self._level)

            else:
                self._levels_stack = [self._level]

# Autoclose context with infinite level do not change the levels !
        elif ctxtinfos.openclose == AUTOCLOSE \
        and self._levels_stack \
        and self._level == self.INFINITY:
            self._level = self._levels_stack[-1]

# Close context with infinite level need to clean the stack of levels !
        elif ctxtinfos.openclose == CLOSE \
        and self._levels_stack \
        and self._level == self.INFINITY:
            self._levels_stack.pop(-1)

            if self._levels_stack:
                self._level = self._levels_stack[-1]

            else:
                self._level = 0


    def close_ctxt_at_end(self):
        """
prototype::
    action = this method closes all contexts than can be closed automatically
             at the very end of the ¨orpyste file
        """
        while self._ctxts_stack:
            lastctxt_kind = self._ctxts_stack.pop(-1).kind

            if lastctxt_kind not in self.CTXTS_KINDS_CLOSED_AT_END:
                raise ASTError(
                    "unclosed context: " \
                    + "see line no.{0} and context \"{1}\"".format(
                        self._nbline, lastctxt_kind
                    )
                )

            self.store_one_ctxt(
                CtxtInfos(kind = lastctxt_kind, openclose = CLOSE)
            )


# -- LOOKING FOR DATAS IN CONTENTS -- #

    def search_contents(self):
        """
prototype::
    action = this method looks for datas in contents regarding the mode of the
             blocks.
        """
        _defaultmatcher     = self.CONTENTS_MATCHERS.get(DEFAULT, None)
        self._matcherstack  = []
        self._nb_emptylines = 0

        for onemeta in self.next_partial_meta():
# The big messe of empty lines in verbatim content.
# One new block.
            if onemeta[KIND_TAG] == BLOCK_TAG:
                if onemeta[OPENCLOSE] == OPEN:
# Preceding block must be a container !
                    if not self.last_block_is_container():
                        raise ASTError(
                            "last block not a container, see line nb.{0}" \
                                .format(onemeta[NBLINE_TAG])
                        )

                    matcher = self.CONTENTS_MATCHERS.get(
                        onemeta[GRPS_FOUND_TAG][NAME_TAG],
                        _defaultmatcher
                    )

                    if not matcher:
                        raise ASTError(
                            "last block << {0} >> is illegal, see line nb.{1}" \
                                .format(
                                    onemeta[GRPS_FOUND_TAG][NAME_TAG],
                                    onemeta[NBLINE_TAG]
                                )
                        )

# We must know the mode used by this block.
                    onemeta[MODE_TAG] = matcher.mode

                    self._matcherstack.append(matcher)

                else:
                    self._matcherstack.pop(-1)

# Some content.
            elif onemeta[KIND_TAG] == SPE_CONTENT_TAG:
                if not self._matcherstack:
                    raise ASTError(
                        "no block before, see line nb.{0}".format(
                            onemeta[NBLINE_TAG]
                        )
                    )

# A good content ?
                if self.match(onemeta[CONTENT_TAG], self._matcherstack[-1]):
# We have to remove escaped character ``\::``.
                    if 'value_in_line' in self._groups_found:
                        value_in_line = self._groups_found['value_in_line']

                        if value_in_line.endswith("\::"):
                            value_in_line = value_in_line[:-3] + "::"
                            self._groups_found['value_in_line'] = value_in_line

                    onemeta[CONTENT_TAG] = self._groups_found

# We can add the metadatas.
            self.add(onemeta)


    def last_block_is_container(self):
        """
prototype::
    return = bool ;
             ``True`` or ``False`` whether the last block opened is or not a
             container
        """
        if self._matcherstack:
            return self._matcherstack[-1].mode == CONTAINER

        return True


# -- STORING THE METADATAS -- #

    def add(self, metadatas):
        self.view.write(metadatas)


    def add_partial(self, metadatas):
        self._partial_view.write(metadatas)


    def next_partial_meta(self):
        for x in self._partial_view:
            yield x


    def store_one_ctxt(self, ctxtinfos, not_add_groups_alone = True):
        metadatas = {
            KIND_TAG  : ctxtinfos.kind,
            NBLINE_TAG: self._nbline,
        }

        if ctxtinfos.openclose:
            if ctxtinfos.openclose == AUTOCLOSE:
                metadatas[OPENCLOSE] = OPEN

            else:
                metadatas[OPENCLOSE] = ctxtinfos.openclose

        if ctxtinfos.verbatim:
            verbatim = self._groups_found.get(CONTENT_TAG, None)

            if verbatim is not None:
                del self._groups_found[CONTENT_TAG]

        else:
            verbatim = None

        if not_add_groups_alone and self._groups_found:
            metadatas[GRPS_FOUND_TAG] = self._groups_found

        if verbatim is not None:
            verbatim = {
                KIND_TAG   : VERB_CONTENT_TAG,
                NBLINE_TAG : self._nbline,
                CONTENT_TAG: verbatim,
            }

# We have to keep extra indentations !
        if ctxtinfos.kind == SPE_CONTENT_TAG:
            if self._levels_stack \
            and self._levels_stack[-1] != self.INFINITY \
            and self._level != self.INFINITY:
                if self._levels_stack \
                and self._level > self._levels_stack[-1]:
                    extra = " "*4*(self._level - self._levels_stack[-1] - 1)

                else:
                    extra = " "*self._level

            else:
                extra = ""

            metadatas[CONTENT_TAG] = "{0}{1}".format(extra, self._line)

            if self._verbatim:
                metadatas[KIND_TAG] = VERB_CONTENT_TAG

# We must change emtylines in comment to a verbatim empty content.
        elif ctxtinfos.kind == EMPTYLINE_TAG and self._verbatim:
            metadatas[KIND_TAG]    = VERB_CONTENT_TAG
            metadatas[CONTENT_TAG] = ""

        if ctxtinfos.openclose == CLOSE:
            metadatas, verbatim = verbatim, metadatas

        if metadatas:
            self.add_partial(metadatas)

        if verbatim:
            self.add_partial(verbatim)

            if ctxtinfos.openclose == OPEN:
                self._verbatim = True

            elif ctxtinfos.openclose == CLOSE:
                self._verbatim = False

        if ctxtinfos.openclose == AUTOCLOSE:
            new_metadatas = {k: v for k, v in metadatas.items()}
            new_metadatas[OPENCLOSE] = CLOSE
            self.add_partial(new_metadatas)


# -- MAGIC METHOD -- #

    def __iter__(self):
        for x in self.view:
            yield x
Esempio n. 7
0
    def build(self):
        # We build the AST view.
        self.ast = self.AST(mode=self.mode, content=self.content)

        self.ast.build()

        if self.ast.view.mode == "list":
            self.walk_view = IOView(self.ast.view.mode)

        else:
            self.walk_view = IOView(
                mode=self.ast.view.mode,
                path=self.ast.view.datas.with_suffix(".walk"))

        with self.walk_view:
            # -- START OF THE WALK -- #
            self.start()

            # We must keep all metadatas for fine tuning in the attribut ``self.metadata``
            # that contains all the necessary informations.
            self.datashasbeenfound = False
            self.isfirstsection = True

            self.insection = False
            self._section_title = []

            self.incomment = False
            self.indentlevel = -1

            self.last_mode = ""
            self.modes_stack = []
            self.names_stack = []

            self.nb_empty_verbline = 0

            self.kv_nbline = -1
            lastkeyval = {}

            for self.metadata in self.ast:
                # --- IMPORTANT : UGLY DEBUG --- #
                # print("--- @@@ WALK @@@ ---", self.metadata,sep="\n");continue

                kind = self.metadata[KIND_TAG]
                self.nbline = self.metadata[NBLINE_TAG]

                # -- SECTION -- #
                if kind == "section":
                    if self.metadata[OPENCLOSE] == OPEN:
                        if self.isfirstsection \
                        and self.datashasbeenfound:
                            raise PeufError(
                                "datas found before the first section")

                        self.insection = True
                        self.datashasbeenfound = True
                        self.isfirstsection = False

                        self.open_section()

                    else:
                        titlesize = len(self._section_title)

                        if titlesize == 0:
                            raise PeufError("empty title for a section")

                        elif titlesize != 1:
                            raise PeufError(
                                "title for a section not upon a single line")

                        if "\\" in self._section_title[0] \
                        or "/" in self._section_title[0]:
                            raise PeufError(
                                "title can't contain << \ >> or << / >>")

                        self.section_title(self._section_title.pop(0))

                        self.insection = False
                        self.close_section()

# -- COMMENT -- #
                elif kind.startswith("comment-"):
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.incomment = True

                        # Verbatim content is verbatim !!!!
                        if self.last_mode == VERBATIM:
                            self._add_empty_verbline()

                        self.open_comment(kind[8:])

                    else:
                        self.incomment = False
                        self.close_comment(kind[8:])

# -- COMMENT LINE OR TITLE OF A SECTION -- #
                elif kind == VERB_CONTENT_TAG:
                    if self.insection:
                        self._section_title.append(self.metadata[CONTENT_TAG])

                    else:
                        self.content_in_comment(self.metadata[CONTENT_TAG])

# -- EMPTY LINE -- #
                elif kind == EMPTYLINE_TAG:
                    if self.incomment:
                        self.content_in_comment("")

                    elif self.last_mode == VERBATIM:
                        self.nb_empty_verbline += 1

# -- BLOCK -- #
                elif kind == BLOCK_TAG:
                    # An opening block
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.indentlevel += 1

                        self.last_mode = self.metadata[MODE_TAG]
                        self.modes_stack.append(self.last_mode)

                        name = self.metadata[GRPS_FOUND_TAG][NAME_TAG]
                        self.names_stack.append(name)

                        self.datashasbeenfound = True
                        self.open_block(name)

                        # For block with a content, we have to augment the value of the indentation.
                        if self.last_mode != CONTAINER:
                            self.indentlevel += 1

# We have to manage key-value modes fo which a value can be written over
# several lines !
                        if self.last_mode.endswith(KEYVAL):
                            lastkeyval = {}
                            keysused = []

# A closing block
                    else:
                        if self.last_mode == VERBATIM:
                            self.nb_empty_verbline = 0

                        name = self.names_stack.pop(-1)

                        # Do we have a key-value couple ?
                        if lastkeyval:
                            self.add_keyval(lastkeyval)
                            lastkeyval = {}
                            keysused = []
                            self.indentlevel -= 1

                            # We have to take care of last comments in a block
                            self.kv_nbline = float("inf")
                            self.close_block(name)
                            self.kv_nbline = -1

                        else:
                            # Are we closing a block with a content ?
                            if self.last_mode != CONTAINER:
                                self.indentlevel -= 1

                            self.close_block(name)

                        self.indentlevel -= 1
                        self.modes_stack.pop(-1)

                        if self.modes_stack:
                            self.last_mode = self.modes_stack[-1]
                        else:
                            self.last_mode = ""

# -- MAGIC COMMENT -- #
                elif kind == MAGIC_COMMENT:
                    if self.last_mode != VERBATIM:
                        raise PeufError(
                            "magic comment not used for a verbatim content")

                    if self.metadata[OPENCLOSE] == OPEN:
                        self._add_empty_verbline()
                        self.add_magic_comment()

# -- VERBATIM CONTENT -- #   UTILE ??????
                elif self.last_mode == VERBATIM:
                    self._add_empty_verbline()
                    self.add_line(self.metadata[CONTENT_TAG][VAL_IN_LINE_TAG])

# -- KEY-VAL CONTENT -- #
                else:
                    content = self.metadata[CONTENT_TAG]

                    if VAL_IN_LINE_TAG in content:
                        if not lastkeyval:
                            raise PeufError(
                                "missing first key, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]))

                        lastkeyval[VAL_TAG] \
                        += " " + content[VAL_IN_LINE_TAG].strip()
                        self.kv_nbline = self.metadata[NBLINE_TAG]

                    else:
                        if lastkeyval:
                            self.add_keyval(lastkeyval)

                        self.kv_nbline = self.metadata[NBLINE_TAG]
                        key = content[KEY_TAG]

                        if self.last_mode == KEYVAL and key in keysused:
                            raise PeufError(
                                "key already used, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]))

                        keysused.append(key)

                        lastkeyval = content

# -- END OF THE WALK -- #

            self.end()

        self.builddone = True

        return self
Esempio n. 8
0
class WalkInAST():
    """
prototype::
    see = parse.ast.AST

    arg-attr = pathlib.Path, str: content ;
               see the documentation of ``parse.ast.AST``
    arg-attr = str, dict: mode ;
               see the documentation of ``parse.ast.AST``
    arg-attr = str: encoding = "utf-8" ;
               see the documentation of ``parse.ast.AST``
    arg-attr = bool: build_asts = True ;
               **this variable is only useful for a content in a file.**
               ``build_asts = True`` indicates to analyse a file and to produce
               temporary files, whereas ``build_asts = False`` asks to use
               the temporary files (this is a way to store physically the
               partial analysis)
    arg-attr = bool: remove_asts = True ;
               **this variable is only useful for a content in a file.**
               ``remove_asts = True`` indicates to remove temporary files built
               to analyze a file, whereas ``remove_asts = False`` asks to keep
               the temporary files (this is a way to store physically the
               partial analysis)

    attr = orpyste.tools.ioview.IOView: walk_view ;
           this is the attribut to use if you want to store information during
           the walk.
    attr = str: last_mode ;
           this string is the mode of the very last block opened 
    attr = list: modes_stack ;
           this stack list contains the modes of the last blocks opened 
    attr = dict: metadata ;
           this is a dictionary sent when using ``for metadata in oneast: ...``
           where ``oneast`` is an instance of ``parse.ast.AST``. This gives you
           all informations about the current piece of the AST.


warning::
    This class only implements the walking but she doesn't acheive any action.
    To do something, you have to subclass ``WalkInAST`` and to implement what
    you need in the following methods (see their documentations for more
    informations and also the class ``orpyste.data.Read`` for one real example
    of use).

        * ``start`` and ``end`` are methods called just before and after the
        walk.

        * ``open_comment`` and ``close_comment`` are called when a comment has
        to be opened or closed, whereas ``content_in_comment`` allows to add a
        content met inside a comment.

        * ``open_section`` and ``close_section`` are called when a section has
        to be opened or closed, whereas ``section_title`` is for managing the
        title of section.

        * ``open_block`` and ``close_block`` are methods called just before and
        after a block is opened or closed respectively.

        * ``add_keyval`` can add a key-separator-value data.

        * ``add_line`` allows to add a single verbatim line.
    """
    AST = AST

    def __init__(self,
                 content,
                 mode,
                 encoding="utf-8",
                 build_asts=True,
                 remove_asts=True):
        self.content = content
        self.mode = mode
        self.encoding = encoding

        self.build_asts = build_asts
        self.remove_asts = remove_asts

        self.builddone = False

    def build(self):
        # We build the AST view.
        self.ast = self.AST(mode=self.mode, content=self.content)

        self.ast.build()

        if self.ast.view.mode == "list":
            self.walk_view = IOView(self.ast.view.mode)

        else:
            self.walk_view = IOView(
                mode=self.ast.view.mode,
                path=self.ast.view.datas.with_suffix(".walk"))

        with self.walk_view:
            # -- START OF THE WALK -- #
            self.start()

            # We must keep all metadatas for fine tuning in the attribut ``self.metadata``
            # that contains all the necessary informations.
            self.datashasbeenfound = False
            self.isfirstsection = True

            self.insection = False
            self._section_title = []

            self.incomment = False
            self.indentlevel = -1

            self.last_mode = ""
            self.modes_stack = []
            self.names_stack = []

            self.nb_empty_verbline = 0

            self.kv_nbline = -1
            lastkeyval = {}

            for self.metadata in self.ast:
                # --- IMPORTANT : UGLY DEBUG --- #
                # print("--- @@@ WALK @@@ ---", self.metadata,sep="\n");continue

                kind = self.metadata[KIND_TAG]
                self.nbline = self.metadata[NBLINE_TAG]

                # -- SECTION -- #
                if kind == "section":
                    if self.metadata[OPENCLOSE] == OPEN:
                        if self.isfirstsection \
                        and self.datashasbeenfound:
                            raise PeufError(
                                "datas found before the first section")

                        self.insection = True
                        self.datashasbeenfound = True
                        self.isfirstsection = False

                        self.open_section()

                    else:
                        titlesize = len(self._section_title)

                        if titlesize == 0:
                            raise PeufError("empty title for a section")

                        elif titlesize != 1:
                            raise PeufError(
                                "title for a section not upon a single line")

                        if "\\" in self._section_title[0] \
                        or "/" in self._section_title[0]:
                            raise PeufError(
                                "title can't contain << \ >> or << / >>")

                        self.section_title(self._section_title.pop(0))

                        self.insection = False
                        self.close_section()

# -- COMMENT -- #
                elif kind.startswith("comment-"):
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.incomment = True

                        # Verbatim content is verbatim !!!!
                        if self.last_mode == VERBATIM:
                            self._add_empty_verbline()

                        self.open_comment(kind[8:])

                    else:
                        self.incomment = False
                        self.close_comment(kind[8:])

# -- COMMENT LINE OR TITLE OF A SECTION -- #
                elif kind == VERB_CONTENT_TAG:
                    if self.insection:
                        self._section_title.append(self.metadata[CONTENT_TAG])

                    else:
                        self.content_in_comment(self.metadata[CONTENT_TAG])

# -- EMPTY LINE -- #
                elif kind == EMPTYLINE_TAG:
                    if self.incomment:
                        self.content_in_comment("")

                    elif self.last_mode == VERBATIM:
                        self.nb_empty_verbline += 1

# -- BLOCK -- #
                elif kind == BLOCK_TAG:
                    # An opening block
                    if self.metadata[OPENCLOSE] == OPEN:
                        self.indentlevel += 1

                        self.last_mode = self.metadata[MODE_TAG]
                        self.modes_stack.append(self.last_mode)

                        name = self.metadata[GRPS_FOUND_TAG][NAME_TAG]
                        self.names_stack.append(name)

                        self.datashasbeenfound = True
                        self.open_block(name)

                        # For block with a content, we have to augment the value of the indentation.
                        if self.last_mode != CONTAINER:
                            self.indentlevel += 1

# We have to manage key-value modes fo which a value can be written over
# several lines !
                        if self.last_mode.endswith(KEYVAL):
                            lastkeyval = {}
                            keysused = []

# A closing block
                    else:
                        if self.last_mode == VERBATIM:
                            self.nb_empty_verbline = 0

                        name = self.names_stack.pop(-1)

                        # Do we have a key-value couple ?
                        if lastkeyval:
                            self.add_keyval(lastkeyval)
                            lastkeyval = {}
                            keysused = []
                            self.indentlevel -= 1

                            # We have to take care of last comments in a block
                            self.kv_nbline = float("inf")
                            self.close_block(name)
                            self.kv_nbline = -1

                        else:
                            # Are we closing a block with a content ?
                            if self.last_mode != CONTAINER:
                                self.indentlevel -= 1

                            self.close_block(name)

                        self.indentlevel -= 1
                        self.modes_stack.pop(-1)

                        if self.modes_stack:
                            self.last_mode = self.modes_stack[-1]
                        else:
                            self.last_mode = ""

# -- MAGIC COMMENT -- #
                elif kind == MAGIC_COMMENT:
                    if self.last_mode != VERBATIM:
                        raise PeufError(
                            "magic comment not used for a verbatim content")

                    if self.metadata[OPENCLOSE] == OPEN:
                        self._add_empty_verbline()
                        self.add_magic_comment()

# -- VERBATIM CONTENT -- #   UTILE ??????
                elif self.last_mode == VERBATIM:
                    self._add_empty_verbline()
                    self.add_line(self.metadata[CONTENT_TAG][VAL_IN_LINE_TAG])

# -- KEY-VAL CONTENT -- #
                else:
                    content = self.metadata[CONTENT_TAG]

                    if VAL_IN_LINE_TAG in content:
                        if not lastkeyval:
                            raise PeufError(
                                "missing first key, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]))

                        lastkeyval[VAL_TAG] \
                        += " " + content[VAL_IN_LINE_TAG].strip()
                        self.kv_nbline = self.metadata[NBLINE_TAG]

                    else:
                        if lastkeyval:
                            self.add_keyval(lastkeyval)

                        self.kv_nbline = self.metadata[NBLINE_TAG]
                        key = content[KEY_TAG]

                        if self.last_mode == KEYVAL and key in keysused:
                            raise PeufError(
                                "key already used, see line #{0}".format(
                                    self.metadata[NBLINE_TAG]))

                        keysused.append(key)

                        lastkeyval = content

# -- END OF THE WALK -- #

            self.end()

        self.builddone = True

        return self

# -- START AND END OF THE WALK -- #

    def start(self):
        """
This method is called just before the walk starts.
        """
        ...

    def end(self):
        """
This method is called just after the end of the walk.
        """
        ...

    def remove_extras(self):
        self.ast.view.remove()
        self.walk_view.remove()

# -- COMMENTS -- #

    def open_comment(self, kind):
        """
prototype::
    arg = str: kind in ["singleline", "multilines", "multilines-singleline"] ;
          ``kind = "singleline"`` is for orpyste::``// ...``,
          ``kind = "multilines"`` is for orpyste::``/* ... */`` where the
          content contains at least one back return, and
          ``kind = "multilines-singleline"`` is for orpyste::``/* ... */``
          which is all in a single line


This method is for opening a comment. No content is given there (see the method
``content_in_comment``).
        """
        ...

    def close_comment(self, kind):
        """
prototype::
    arg = str: kind in ["singleline", "multilines", "multilines-singleline"] ;
          ``kind = "singleline"`` is for orpyste::``// ...``,
          ``kind = "multilines"`` is for orpyste::``/* ... */`` where the
          content contains at least one back return, and
          ``kind = "multilines-singleline"`` is for orpyste::``/* ... */``
          which is all in a single line


This method is for closing a comment. No content is given there (see the method
``content_in_comment``).
        """
        ...

    def content_in_comment(self, line):
        """
prototype::
    arg = str: line


This method is for adding content inside a comment (see the methods
``open_comment`` and ``close_comment``).
        """
        ...

# -- SECTIONS -- #

    def open_section(self):
        """
This method is for opening a section.
        """
        ...

    def close_section(self):
        """
This method is for closing a section.
        """
        ...

    def section_title(self, title):
        """
This method manages the title of a section.
        """
        ...

# -- BLOCKS -- #

    def open_block(self, name):
        """
prototype::
    arg = str: name


This method is for opening a new block knowning its name.
        """
        ...

    def close_block(self, name):
        """
This method is for closing a block knowning its name.
        """
        ...

# -- (MULTI)KEYVAL -- #

    def add_keyval(self, keyval):
        """
prototype::
    arg = {"key": str, "sep": str, "value": str}: keyval


This method is for adding a new key with its associated value and separator.
All this informations are in the dictionary ``keyval``.
        """
        ...

# -- VERBATIM -- #

# We have to take care of the last empty lines !!!

    def _add_empty_verbline(self):
        if self.nb_empty_verbline:
            self.nbline -= self.nb_empty_verbline + 1

            for _ in range(self.nb_empty_verbline):
                self.nbline += 1
                self.add_line("")

            self.nbline += 1
            self.nb_empty_verbline = 0

    def add_line(self, line):
        """
prototype::
    arg = str: line


This method is for adding verbatim content.
        """
        ...

    def add_magic_comment(self):
        """
This method is for adding the magic comment used for empty lines at the end of
verbatim contents.
        """
        ...

# -- CONTEXT MANAGER -- #

    def __enter__(self):
        # We have to always build asts if the content is a string !
        if self.build_asts \
        or not isinstance(self.content, str):
            self.build()

        return self

    def __exit__(self, type, value, traceback):
        if self.remove_asts:
            self.remove_extras()