Python extract Examples

Programming Language: Python

Namespace/Package Name: translate.misc.quote

Method/Function: extract

Examples at hotexamples.com: 4

Python extract - 4 examples found. These are the top rated real world Python examples of translate.misc.quote.extract extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_quote.py Project: nijel/translate

def test_extract():
    """tests the extract function"""
    assert quote.extract("the <quoted> part", "<", ">", "\\", 0) == ("<quoted>", False)
    assert quote.extract("the 'quoted' part", "'", "'", "\\", 0) == ("'quoted'", False)
    assert quote.extract("the 'isn\\'t escaping fun' part", "'", "'", "\\", 0) == (
        "'isn\\'t escaping fun'",
        False,
    )
    assert quote.extract("the 'isn\\'t something ", "'", "'", "\\", 0) == (
        "'isn\\'t something ",
        True,
    )
    assert quote.extract("<quoted>\\", "<", ">", "\\", 0) == ("<quoted>", False)
    assert quote.extract("<quoted><again>", "<", ">", "\\", 0) == (
        "<quoted><again>",
        False,
    )
    assert quote.extract("<quoted>\\\\<again>", "<", ">", "\\", 0) == (
        "<quoted><again>",
        False,
    )
    assert quote.extract("<quoted\\>", "<", ">", "\\", 0) == ("<quoted\\>", True)
    assert quote.extract(' -->\n<!ENTITY blah "Some">', "<!--", "-->", None, 1) == (
        " -->",
        False,
    )
    assert quote.extract('">\n', '"', '"', None, True) == ('"', False)

Example #2

Show file

File: test_quote.py Project: onia/translate

def test_extract():
    """tests the extract function"""
    assert quote.extract("the <quoted> part", "<", ">", "\\", 0) == ("<quoted>", False)
    assert quote.extract("the 'quoted' part", "'", "'", "\\", 0) == ("'quoted'", False)
    assert quote.extract("the 'isn\\'t escaping fun' part", "'", "'", "\\", 0) == ("'isn\\'t escaping fun'", False)
    assert quote.extract("the 'isn\\'t something ", "'", "'", "\\", 0) == ("'isn\\'t something ", True)
    assert quote.extract("<quoted>\\", "<", ">", "\\", 0) == ("<quoted>", False)
    assert quote.extract("<quoted><again>", "<", ">", "\\", 0) == ("<quoted><again>", False)
    assert quote.extract("<quoted>\\\\<again>", "<", ">", "\\", 0) == ("<quoted><again>", False)
    assert quote.extract("<quoted\\>", "<", ">", "\\", 0) == ("<quoted\\>", True)
    assert quote.extract(' -->\n<!ENTITY blah "Some">', "<!--", "-->", None, 1) == (" -->", False)
    assert quote.extract('">\n', '"', '"', None, True) == ('"', False)

Example #3

Show file

File: dtd.py Project: pombredanne/verbatim

    def parse(self, dtdsrc):
        """read the first dtd element from the source code into this object, return linesprocessed"""
        self.comments = []
        # make all the lists the same
        self._locfilenotes = self.comments
        self._locgroupstarts = self.comments
        self._locgroupends = self.comments
        self._locnotes = self.comments
        # self._locfilenotes = []
        # self._locgroupstarts = []
        # self._locgroupends = []
        # self._locnotes = []
        # self.comments = []
        self.entity = None
        self.definition = ''
        if not dtdsrc:
            return 0
        lines = dtdsrc.split("\n")
        linesprocessed = 0
        comment = ""
        for line in lines:
            line += "\n"
            linesprocessed += 1
            # print "line(%d,%d): " % (self.incomment,self.inentity),line[:-1]
            if not self.incomment:
                if (line.find('<!--') != -1):
                    self.incomment = True
                    self.continuecomment = False
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    (comment, dummy) = quote.extract(line, "<!--", "-->", None,
                                                     0)
                    if comment.find('LOCALIZATION NOTE') != -1:
                        l = quote.findend(comment, 'LOCALIZATION NOTE')
                        while (comment[l] == ' '):
                            l += 1
                        if comment.find('FILE', l) == l:
                            self.commenttype = "locfile"
                        elif comment.find('BEGIN', l) == l:
                            self.commenttype = "locgroupstart"
                        elif comment.find('END', l) == l:
                            self.commenttype = "locgroupend"
                        else:
                            self.commenttype = "locnote"
                    else:
                        # plain comment
                        self.commenttype = "comment"
                #FIXME: bloody entity might share a line with something important
                elif not self.inentity and re.search("%.*;", line):
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    self.comments.append(("comment", line))
                    line = ""
                    continue

            if self.incomment:
                # some kind of comment
                (comment,
                 self.incomment) = quote.extract(line, "<!--", "-->", None,
                                                 self.continuecomment)
                # print "comment(%d,%d): " % (self.incomment,self.continuecomment),comment
                self.continuecomment = self.incomment
                # strip the comment out of what will be parsed
                line = line.replace(comment, "", 1)
                # add a end of line of this is the end of the comment
                if not self.incomment:
                    if line.isspace():
                        comment += line
                        line = ''
                    else:
                        comment += '\n'
                # check if there's actually an entity definition that's commented out
                # TODO: parse these, store as obsolete messages
                # if comment.find('<!ENTITY') != -1:
                #     # remove the entity from the comment
                #     comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1)
                # depending on the type of comment (worked out at the start), put it in the right place
                # make it record the comment and type as a tuple
                commentpair = (self.commenttype, comment)
                if self.commenttype == "locfile":
                    self._locfilenotes.append(commentpair)
                elif self.commenttype == "locgroupstart":
                    self._locgroupstarts.append(commentpair)
                elif self.commenttype == "locgroupend":
                    self._locgroupends.append(commentpair)
                elif self.commenttype == "locnote":
                    self._locnotes.append(commentpair)
                elif self.commenttype == "comment":
                    self.comments.append(commentpair)

            if not self.inentity and not self.incomment:
                entitypos = line.find('<!ENTITY')
                if entitypos != -1:
                    self.inentity = True
                    beforeentity = line[:entitypos].strip()
                    if beforeentity.startswith("#"):
                        self.hashprefix = beforeentity
                    self.entitypart = "start"
                else:
                    self.unparsedlines.append(line)

            if self.inentity:
                if self.entitypart == "start":
                    # the entity definition
                    e = quote.findend(line, '<!ENTITY')
                    line = line[e:]
                    self.entitypart = "name"
                    self.entitytype = "internal"
                if self.entitypart == "name":
                    s = 0
                    e = 0
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_entity = ' ' * (e - s)
                    s = e
                    self.entity = ''
                    if (e < len(line) and line[e] == '%'):
                        self.entitytype = "external"
                        self.entityparameter = ""
                        e += 1
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                    while (e < len(line) and not line[e].isspace()):
                        self.entity += line[e]
                        e += 1
                    s = e

                    assert quote.rstripeol(self.entity) == self.entity
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_definition = ' ' * (e - s)
                    if self.entity:
                        if self.entitytype == "external":
                            self.entitypart = "parameter"
                        else:
                            self.entitypart = "definition"
                        # remember the start position and the quote character
                        if e == len(line):
                            self.entityhelp = None
                            e = 0
                            continue
                        elif self.entitypart == "definition":
                            self.entityhelp = (e, line[e])
                            self.instring = False
                if self.entitypart == "parameter":
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    paramstart = e
                    while (e < len(line) and line[e].isalnum()):
                        e += 1
                    self.entityparameter += line[paramstart:e]
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    line = line[e:]
                    e = 0
                    if not line:
                        continue
                    if line[0] in ('"', "'"):
                        self.entitypart = "definition"
                        self.entityhelp = (e, line[e])
                        self.instring = False
                if self.entitypart == "definition":
                    if self.entityhelp is None:
                        e = 0
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                        if e == len(line):
                            continue
                        self.entityhelp = (e, line[e])
                        self.instring = False
                    # actually the lines below should remember instring, rather than using it as dummy
                    e = self.entityhelp[0]
                    if (self.entityhelp[1] == "'"):
                        (defpart, self.instring) = quote.extract(
                            line[e:],
                            "'",
                            "'",
                            startinstring=self.instring,
                            allowreentry=False)
                    elif (self.entityhelp[1] == '"'):
                        (defpart, self.instring) = quote.extract(
                            line[e:],
                            '"',
                            '"',
                            startinstring=self.instring,
                            allowreentry=False)
                    else:
                        raise ValueError("Unexpected quote character... %r" %
                                         (self.entityhelp[1]))
                    # for any following lines, start at the beginning of the line. remember the quote character
                    self.entityhelp = (0, self.entityhelp[1])
                    self.definition += defpart
                    if not self.instring:
                        self.closing = line[e + len(defpart):].rstrip("\n\r")
                        self.inentity = False
                        break

        # uncomment this line to debug processing
        if 0:
            for attr in dir(self):
                r = repr(getattr(self, attr))
                if len(r) > 60:
                    r = r[:57] + "..."
                self.comments.append(("comment", "self.%s = %s" % (attr, r)))
        return linesprocessed

Example #4

Show file

File: dtd.py Project: anukat2015/translate

    def parse(self, dtdsrc):
        """read the first dtd element from the source code into this object, return linesprocessed"""
        self.comments = []
        # make all the lists the same
        self._locfilenotes = self.comments
        self._locgroupstarts = self.comments
        self._locgroupends = self.comments
        self._locnotes = self.comments
        # self._locfilenotes = []
        # self._locgroupstarts = []
        # self._locgroupends = []
        # self._locnotes = []
        # self.comments = []
        self.entity = None
        self.definition = ''
        if not dtdsrc:
            return 0
        lines = dtdsrc.split("\n")
        linesprocessed = 0
        comment = ""
        for line in lines:
            line += "\n"
            linesprocessed += 1
            if not self.incomment:
                if (line.find('<!--') != -1):
                    self.incomment = True
                    self.continuecomment = False
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
                    if comment.find('LOCALIZATION NOTE') != -1:
                        l = quote.findend(comment, 'LOCALIZATION NOTE')
                        while (comment[l] == ' '):
                            l += 1
                        if comment.find('FILE', l) == l:
                            self.commenttype = "locfile"
                        elif comment.find('BEGIN', l) == l:
                            self.commenttype = "locgroupstart"
                        elif comment.find('END', l) == l:
                            self.commenttype = "locgroupend"
                        else:
                            self.commenttype = "locnote"
                    else:
                        # plain comment
                        self.commenttype = "comment"
                #FIXME: bloody entity might share a line with something important
                elif not self.inentity and re.search("%.*;", line):
                    # now work out the type of comment, and save it (remember we're not in the comment yet)
                    self.comments.append(("comment", line))
                    line = ""
                    continue

            if self.incomment:
                # some kind of comment
                (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
                self.continuecomment = self.incomment
                # strip the comment out of what will be parsed
                line = line.replace(comment, "", 1)
                # add a end of line of this is the end of the comment
                if not self.incomment:
                    if line.isspace():
                        comment += line
                        line = ''
                    else:
                        comment += '\n'
                # check if there's actually an entity definition that's commented out
                # TODO: parse these, store as obsolete messages
                # if comment.find('<!ENTITY') != -1:
                #     # remove the entity from the comment
                #     comment, dummy = quote.extractwithoutquotes(comment, ">", "<!ENTITY", None, 1)
                # depending on the type of comment (worked out at the start), put it in the right place
                # make it record the comment and type as a tuple
                commentpair = (self.commenttype, comment)
                if self.commenttype == "locfile":
                    self._locfilenotes.append(commentpair)
                elif self.commenttype == "locgroupstart":
                    self._locgroupstarts.append(commentpair)
                elif self.commenttype == "locgroupend":
                    self._locgroupends.append(commentpair)
                elif self.commenttype == "locnote":
                    self._locnotes.append(commentpair)
                elif self.commenttype == "comment":
                    self.comments.append(commentpair)

            if not self.inentity and not self.incomment:
                entitypos = line.find('<!ENTITY')
                if entitypos != -1:
                    self.inentity = True
                    beforeentity = line[:entitypos].strip()
                    if beforeentity.startswith("#"):
                        self.hashprefix = beforeentity
                    self.entitypart = "start"
                else:
                    self.unparsedlines.append(line)

            if self.inentity:
                if self.entitypart == "start":
                    # the entity definition
                    e = quote.findend(line, '<!ENTITY')
                    line = line[e:]
                    self.entitypart = "name"
                    self.entitytype = "internal"
                if self.entitypart == "name":
                    s = 0
                    e = 0
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_entity = ' ' * (e - s)
                    s = e
                    self.entity = ''
                    if (e < len(line) and line[e] == '%'):
                        self.entitytype = "external"
                        self.entityparameter = ""
                        e += 1
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                    while (e < len(line) and not line[e].isspace()):
                        self.entity += line[e]
                        e += 1
                    s = e

                    assert quote.rstripeol(self.entity) == self.entity
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    self.space_pre_definition = ' ' * (e - s)
                    if self.entity:
                        if self.entitytype == "external":
                            self.entitypart = "parameter"
                        else:
                            self.entitypart = "definition"
                        # remember the start position and the quote character
                        if e == len(line):
                            self.entityhelp = None
                            e = 0
                            continue
                        elif self.entitypart == "definition":
                            self.entityhelp = (e, line[e])
                            self.instring = False
                if self.entitypart == "parameter":
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    paramstart = e
                    while (e < len(line) and line[e].isalnum()):
                        e += 1
                    self.entityparameter += line[paramstart:e]
                    while (e < len(line) and line[e].isspace()):
                        e += 1
                    line = line[e:]
                    e = 0
                    if not line:
                        continue
                    if line[0] in ('"', "'"):
                        self.entitypart = "definition"
                        self.entityhelp = (e, line[e])
                        self.instring = False
                if self.entitypart == "definition":
                    if self.entityhelp is None:
                        e = 0
                        while (e < len(line) and line[e].isspace()):
                            e += 1
                        if e == len(line):
                            continue
                        self.entityhelp = (e, line[e])
                        self.instring = False
                    # actually the lines below should remember instring, rather than using it as dummy
                    e = self.entityhelp[0]
                    if (self.entityhelp[1] == "'"):
                        (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
                    elif (self.entityhelp[1] == '"'):
                        (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
                    else:
                        raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
                    # for any following lines, start at the beginning of the line. remember the quote character
                    self.entityhelp = (0, self.entityhelp[1])
                    self.definition += defpart
                    if not self.instring:
                        self.closing = line[e+len(defpart):].rstrip("\n\r")
                        self.inentity = False
                        break

        # uncomment this line to debug processing
        if 0:
            for attr in dir(self):
                r = repr(getattr(self, attr))
                if len(r) > 60:
                    r = r[:57] + "..."
                self.comments.append(("comment", "self.%s = %s" % (attr, r)))
        return linesprocessed