Beispiel #1
0
    def tokenize(self, text):
        i = 0
        t = 0
        tokens = [""]
        inString = 0

        while i < len(text):

            if inString == 0 and (text[i] == '"' or text[i] == "'"):
                inString = 1
                tokens[t] += text[i]
                i += 1
                continue
            elif inString == 1 and text[i] != '"' and text[i] != "'":
                inString = 0

            if text[i] == "#":
                if len(tokens[t]) == 0:
                    tokens[t] += text[i:]
                    break
                else:
                    tokens.append(text[i:])
                    break

            space = re.match("\s", text[i])
            curTokenSpace = re.match("\s", tokens[t]) or len(tokens[t]) == 0

            if space:
                if curTokenSpace:
                    tokens[t] += text[i]
                else:
                    tokens.append(text[i])
                    t += 1
            elif curTokenSpace:
                if len(tokens[t]) == 0:
                    tokens[t] += text[i]
                else:
                    tokens.append(text[i])
                    t += 1
            else:
                tokens[t] += text[i]

            i += 1

        return tokens
Beispiel #2
0
    def annotateLine(self, lineNumber, text):

        # early exit for commented lines
        if re.match("^\s*#", text):
            return [AnnotatedFragment(text, "comment")]

        # other lines have to be tokenized first
        tokens = self.tokenize(text)

        fragments = []

        for t in tokens:
            classes = []

            if self.keywordMap.has_key(t):
                classes.append("keyword")

            if t.startswith("#"):
                classes.append("comment")

            fragments.append(AnnotatedFragment(t, " ".join(classes)))

        return fragments
Beispiel #3
0
 def isSpace(str):
     return re.match("\s", str)