Python tokenize 예제들, explainshell.options.tokenize Python 예제들

예제 #1

0

파일 보기

    def test_tokenize_equals(self):
        s = 'a b=c'
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 3, 'b'), ts(3, 5, '=c')]
        self.assertTokensEquals(s, t, expected, ('a', 'b', '=c'))

        s = 'a b =c'
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 3, 'b'), ts(4, 6, '=c')]
        self.assertTokensEquals(s, t, expected, ('a', 'b', '=c'))

        s = 'a b= c'
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 3, 'b'), ts(3, 4, '='), ts(5, 6, 'c')]
        self.assertTokensEquals(s, t, expected, ('a', 'b', '=', 'c'))

        s = 'a b = c'
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 3, 'b'), ts(4, 5, '='), ts(6, 7, 'c')]
        self.assertTokensEquals(s, t, expected, ('a', 'b', '=', 'c'))

        s = 'a b  = c'
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 3, 'b'), ts(5, 6, '='), ts(7, 8, 'c')]
        self.assertTokensEquals(s, t, expected, ('a', 'b', '=', 'c'))

예제 #2

0

파일 보기

    def test_parsing_error(self):
        s = 'no escaped character\\'
        message = r'No escaped character \(position 21, ...cter\\\)'
        with self.assertRaisesRegexp(errors.ParsingError, message):
            list(options.tokenize(s))

        s = 'no closing "quotation'
        message = r'No closing quotation \(position 21, ...ation\)'
        with self.assertRaisesRegexp(errors.ParsingError, message):
            list(options.tokenize(s))

예제 #3

0

파일 보기

    def test_tokenize(self):
        s = 'bar -x'
        t = list(options.tokenize(s))
        expected = [ts(0, 3, 'bar'), ts(4, 6, '-x')]
        self.assertTokensEquals(s, t, expected, ('bar', '-x'))

        s = 'wx    y =z '
        t = list(options.tokenize(s))
        expected = [ts(0, 2, 'wx'), ts(6, 7, 'y'), ts(8, 10, '=z')]
        self.assertTokensEquals(s, t, expected, ('wx', 'y', '=z'))

        s = "a 'b' c"
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 5, 'b'), ts(6, 7, 'c')]
        self.assertTokensEquals(s, t, expected, ('a', "'b'", 'c'))

        s = "a 'b  ' c"
        t = list(options.tokenize(s))
        expected = [ts(0, 1, 'a'), ts(2, 7, 'b  '), ts(8, 9, 'c')]
        self.assertTokensEquals(s, t, expected, ('a', "'b  '", 'c'))

예제 #4

0

파일 보기

파일: matcher.py 프로젝트: theicfire/explainshell

    def match(self):
        '''parse s and return a list of matchresult

        match works as follows:
        - tokenize the string using options.tokenize
        - look up a man page that matches the first token
        - check if the man page has 'subcommands', e.g. git commit, if so
          try to find a man page for the first two tokens
        - iterate all tokens
          - search the token as is
          - if the token is a short option (-abc) try to look up each option
            individually
          - if the previous match expected an arg, merge this token with
            the previous one
          - partialmatch: if this man page allows options to start without '-',
            try to match all characters individually (e.g. tar xzvf)
          - check if this man page has any positional arguments
          - mark this token as unknown

        after we finish processing all tokens, we:
        - merge unknown consecutive matches to become one matchresult
        - merge adjacent matchresults that have the same help text (e.g. if we
          had -vvv)
        '''
        logger.info('matching string %r', self.s)
        self.tokens = util.peekable(options.tokenize(self.s))
        #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens])
        self.ts = None

        def attempt_fuzzy(chars):
            m = []
            if chars[0] == '-':
                tokens = [chars[0:2]] + list(chars[2:])
                considerarg = True
            else:
                tokens = list(chars)
                considerarg = False

            oldp = self.pos
            for t in tokens:
                op = t if t[0] == '-' else '-' + t
                option = self.find_option(op)
                if option:
                    if considerarg and not m and option.expectsarg:
                        logger.info(
                            'option %r expected an arg, taking the rest too',
                            option)
                        return [
                            matchresult(self.pos, self.pos + len(chars),
                                        option.text, None)
                        ]

                    mr = matchresult(self.pos, self.pos + len(t), option.text,
                                     None)
                    m.append(mr)
                else:
                    m.append(self.unknown(t, self.pos + len(t)))
                self.pos += len(t)
            self.pos = oldp
            return m

        self.pos, endpos, token = self.nexttoken()
        mps = self.findmanpages(token)
        self.manpage = mps[0]
        if self.manpage.multicommand and self.tokens.hasnext():
            try:
                multi = '%s %s' % (token, self.tokens.peek()[2])
                logger.info(
                    '%r is a multicommand, trying to get another token and look up %r',
                    self.manpage, multi)
                mps = self.findmanpages(multi)
                self.manpage = mps[0]
                self.nexttoken()
                token = multi
            except errors.ProgramDoesNotExist:
                logger.info('no manpage %r for multicommand %r', multi,
                            self.manpage)

        option = None
        matches = []
        matches.append(matchresult(0, len(token), self.manpage.synopsis, None))

        while self.tokens.hasnext():
            self.pos, endpos, token = self.nexttoken()
            logger.info('trying to match token: %r', token)

            self._prevoption = self._currentoption
            option = self.find_option(token)
            if option:
                logger.info('found an exact match for %r: %r', token, option)
                mr = matchresult(self.pos, self.ts.endpos, option.text, None)
                matches.append(mr)
            else:
                if token != '-' and token.startswith(
                        '-') and not token.startswith('--'):
                    logger.debug('looks like a short option')
                    if len(token) > 2:
                        logger.info("trying to split it up")
                        matches.extend(attempt_fuzzy(token))
                        self.pos += len(token)
                    else:
                        matches.append(self.unknown(token))
                elif self._prevoption and self._prevoption.expectsarg:
                    logger.info(
                        "previous option possibly expected an arg, and we can't"
                        " find an option to match the current token, assuming it's an arg"
                    )
                    ea = self._prevoption.expectsarg
                    possibleargs = ea if isinstance(ea, list) else []
                    take = True
                    if possibleargs and token not in possibleargs:
                        take = False
                        logger.info(
                            'token %r not in list of possible args %r for %r',
                            token, possibleargs, self._prevoption)
                    if take:
                        pmr = matches[-1]
                        mr = matchresult(pmr.start, self.ts.endpos, pmr.text,
                                         None)
                        matches[-1] = mr
                    else:
                        matches.append(self.unknown(token))
                elif self.manpage.partialmatch:
                    logger.info('attemping to do a partial match')

                    m = attempt_fuzzy(token)
                    if any(mm.unknown for mm in m):
                        logger.info('one of %r was unknown', token)
                        matches.append(self.unknown(token))
                    else:
                        matches += m
                        self.pos += len(token)
                elif self.manpage.arguments:
                    d = self.manpage.arguments
                    k = list(d.keys())[0]
                    logger.info('got arguments, using %r', k)
                    text = d[k]
                    mr = matchresult(self.pos, self.ts.endpos, text, None)
                    matches.append(mr)
                else:
                    matches.append(self.unknown(token))

        def debugmatch():
            s = '\n'.join([
                '%d) %r = %r' % (i, self.s[m.start:m.end], m.text)
                for i, m in enumerate(matches)
            ])
            return s

        logger.debug('%r matches:\n%s', self.s, debugmatch())

        matches = self._mergeunknowns(matches)
        matches = self._mergeadjacent(matches)

        # add matchresult.match to existing matches
        for i, m in enumerate(matches):
            assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s))
            matches[i] = matchresult(m.start, m.end, m.text,
                                     self.s[m.start:m.end])

        r = [(self.manpage.name, matches)]
        for mp in mps[1:]:
            r.append((mp, None))
        return r

예제 #5

0

파일 보기

파일: matcher.py 프로젝트: OmarIthawi/explainshell

    def match(self):
        '''parse s and return a list of matchresult

        match works as follows:
        - tokenize the string using options.tokenize
        - look up a man page that matches the first token
        - check if the man page has 'subcommands', e.g. git commit, if so
          try to find a man page for the first two tokens
        - iterate all tokens
          - search the token as is
          - if the token is a short option (-abc) try to look up each option
            individually
          - if the previous match expected an arg, merge this token with
            the previous one
          - partialmatch: if this man page allows options to start without '-',
            try to match all characters individually (e.g. tar xzvf)
          - check if this man page has any positional arguments
          - mark this token as unknown

        after we finish processing all tokens, we:
        - merge unknown consecutive matches to become one matchresult
        - merge adjacent matchresults that have the same help text (e.g. if we
          had -vvv)
        '''
        logger.info('matching string %r', self.s)
        self.tokens = util.peekable(options.tokenize(self.s))
        #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens])
        self.ts = None

        def attempt_fuzzy(chars):
            m = []
            if chars[0] == '-':
                tokens = [chars[0:2]] + list(chars[2:])
                considerarg = True
            else:
                tokens = list(chars)
                considerarg = False

            oldp = self.pos
            for t in tokens:
                op = t if t[0] == '-' else '-' + t
                option = self.find_option(op)
                if option:
                    if considerarg and not m and option.expectsarg:
                        logger.info('option %r expected an arg, taking the rest too', option)
                        return [matchresult(self.pos, self.pos+len(chars), option.text, None)]

                    mr = matchresult(self.pos, self.pos+len(t), option.text, None)
                    m.append(mr)
                else:
                    m.append(self.unknown(t, self.pos+len(t)))
                self.pos += len(t)
            self.pos = oldp
            return m

        self.pos, endpos, token = self.nexttoken()
        mps = self.findmanpages(token)
        self.manpage = mps[0]
        if self.manpage.multicommand and self.tokens.hasnext():
            try:
                multi = '%s %s' % (token, self.tokens.peek()[2])
                logger.info('%r is a multicommand, trying to get another token and look up %r', self.manpage, multi)
                mps = self.findmanpages(multi)
                self.manpage = mps[0]
                self.nexttoken()
                token = multi
            except errors.ProgramDoesNotExist:
                logger.info('no manpage %r for multicommand %r', multi, self.manpage)

        option = None
        matches = []
        matches.append(matchresult(0, len(token), self.manpage.synopsis, None))

        while self.tokens.hasnext():
            self.pos, endpos, token = self.nexttoken()
            logger.info('trying to match token: %r', token)

            self._prevoption = self._currentoption
            option = self.find_option(token)
            if option:
                logger.info('found an exact match for %r: %r', token, option)
                mr = matchresult(self.pos, self.ts.endpos, option.text, None)
                matches.append(mr)
            else:
                if token != '-' and token.startswith('-') and not token.startswith('--'):
                    logger.debug('looks like a short option')
                    if len(token) > 2:
                        logger.info("trying to split it up")
                        matches.extend(attempt_fuzzy(token))
                        self.pos += len(token)
                    else:
                        matches.append(self.unknown(token))
                elif self._prevoption and self._prevoption.expectsarg:
                    logger.info("previous option possibly expected an arg, and we can't"
                            " find an option to match the current token, assuming it's an arg")
                    ea = self._prevoption.expectsarg
                    possibleargs = ea if isinstance(ea, list) else []
                    take = True
                    if possibleargs and token not in possibleargs:
                        take = False
                        logger.info('token %r not in list of possible args %r for %r',
                                    token, possibleargs, self._prevoption)
                    if take:
                        pmr = matches[-1]
                        mr = matchresult(pmr.start, self.ts.endpos, pmr.text, None)
                        matches[-1] = mr
                    else:
                        matches.append(self.unknown(token))
                elif self.manpage.partialmatch:
                    logger.info('attemping to do a partial match')

                    m = attempt_fuzzy(token)
                    if any(mm.unknown for mm in m):
                        logger.info('one of %r was unknown', token)
                        matches.append(self.unknown(token))
                    else:
                        matches += m
                        self.pos += len(token)
                elif self.manpage.arguments:
                    d = self.manpage.arguments
                    k = list(d.keys())[0]
                    logger.info('got arguments, using %r', k)
                    text = d[k]
                    mr = matchresult(self.pos, self.ts.endpos, text, None)
                    matches.append(mr)
                else:
                    matches.append(self.unknown(token))

        def debugmatch():
            s = '\n'.join(['%d) %r = %r' % (i, self.s[m.start:m.end], m.text) for i, m in enumerate(matches)])
            return s

        logger.debug('%r matches:\n%s', self.s, debugmatch())

        matches = self._mergeunknowns(matches)
        matches = self._mergeadjacent(matches)

        # add matchresult.match to existing matches
        for i, m in enumerate(matches):
            assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s))
            matches[i] = matchresult(m.start, m.end, m.text, self.s[m.start:m.end])

        r = [(self.manpage.name, matches)]
        for mp in mps[1:]:
            r.append((mp, None))
        return r