Exemple #1
0
def explain(section, program):
    s = store.store('explainshell', config.MONGO_URI)
    try:
        if 'args' in request.args:
            args = request.args['args']
            command = '%s %s' % (program, args)
            matcher_ = matcher.matcher(command, s, section)
            mrs = matcher_.match()
            mr = mrs[0][1]
            l = []
            it = util.peekable(iter(mr))
            while it.hasnext():
                m = it.next()
                spaces = 0
                if it.hasnext():
                    spaces = it.peek().start - m.end
                spaces = ' ' * spaces
                text = m.text
                if text:
                    text = text.decode('utf-8')
                d = {
                    'match': m.match,
                    'unknown': m.unknown,
                    'text': text,
                    'spaces': spaces
                }
                l.append(d)

            d = l[0]
            d['section'] = matcher_.manpage.section
            d['match'] = '%s(%s)' % (d['match'], d['section'])
            d['source'] = matcher_.manpage.source[:-5]
            others = helpers.others([x[0] for x in mrs[1:]])

            return render_template('explain.html',
                                   program=l[0],
                                   matches=l,
                                   othersections=others,
                                   getargs=args)
        else:
            logger.info('/explain section=%r program=%r', section, program)
            mps = s.findmanpage(program, section)
            mp = mps.pop(0)
            program = mp.namesection

            mp = {
                'source': mp.source[:-3],
                'section': mp.section,
                'program': program,
                'synopsis': mp.synopsis,
                'options': [o.text.decode('utf-8') for o in mp.options]
            }

            othersections = helpers.others(mps)
            logger.info('others: %s', othersections)
            return render_template('options.html',
                                   mp=mp,
                                   othersections=helpers.others(mps))
    except errors.ProgramDoesNotExist, e:
        return render_template('error.html', prog=e.args[0])
Exemple #2
0
def explain(section, program):
    s = store.store('explainshell', config.MONGO_URI)
    try:
        if 'args' in request.args:
            args = request.args['args']
            if program is None:
                program = args.split(' ')[0]
                args = ' '.join(args.split(' ')[1:])
            command = '%s %s' % (program, args)
            matcher_ = matcher.matcher(command, s, section)
            mrs = matcher_.match()
            mr = mrs[0][1]
            l = []
            it = util.peekable(iter(mr))
            while it.hasnext():
                m = it.next()
                spaces = 0
                if it.hasnext():
                    spaces = it.peek().start - m.end
                spaces = ' ' * spaces
                text = m.text
                if text:
                    text = text.decode('utf-8')
                d = {'match' : m.match, 'unknown' : m.unknown, 'text' : text, 'spaces' : spaces}
                l.append(d)

            d = l[0]
            d['section'] = matcher_.manpage.section
            d['match'] = '%s(%s)' % (d['match'], d['section'])
            d['source'] = matcher_.manpage.source[:-5]
            others = helpers.others([x[0] for x in mrs[1:]])

            return render_template('explain.html', program=l[0], matches=l,
                                   othersections=others, getargs=args)
        else:
            logger.info('/explain section=%r program=%r', section, program)
            mps = s.findmanpage(program, section)
            mp = mps.pop(0)
            program = mp.namesection

            mp = {'source' : mp.source[:-3],
                  'section' : mp.section,
                  'program' : program,
                  'synopsis' : mp.synopsis,
                  'options' : [o.text.decode('utf-8') for o in mp.options]}

            othersections = helpers.others(mps)
            logger.info('others: %s', othersections)
            return render_template('options.html', mp=mp, othersections=helpers.others(mps))
    except errors.ProgramDoesNotExist, e:
        return render_template('error.html', prog=e.args[0])
Exemple #3
0
 def _mergeadjacent(self, matches):
     merged = []
     it = util.peekable(iter(matches))
     curr = it.next()
     while it.hasnext():
         next = it.peek()
         if curr.text != next.text:
             merged.append(curr)
             curr = it.next()
         else:
             logger.debug('merging adjacent identical matches %d and %d', it.index - 1, it.index)
             it.next()
             curr = matchresult(curr.start, next.end, curr.text, curr.match)
     merged.append(curr)
     return merged
Exemple #4
0
 def _mergeadjacent(self, matches):
     merged = []
     it = util.peekable(iter(matches))
     curr = it.next()
     while it.hasnext():
         next = it.peek()
         if curr.text != next.text:
             merged.append(curr)
             curr = it.next()
         else:
             logger.debug('merging adjacent identical matches %d and %d',
                          it.index - 1, it.index)
             it.next()
             curr = matchresult(curr.start, next.end, curr.text, curr.match)
     merged.append(curr)
     return merged
Exemple #5
0
def explaincommand(command, store):
    matcher_ = matcher.matcher(command, store)
    groups = matcher_.match()
    expansions = matcher_.expansions

    shellgroup = groups[0]
    commandgroups = groups[1:]
    matches = []

    # save a mapping between the help text to its assigned id,
    # we're going to reuse ids that have the same text
    texttoid = {}

    # remember where each assigned id has started in the source,
    # we're going to use it later on to sort the help text by start
    # position
    idstartpos = {}

    l = []
    for m in shellgroup.results:
        commandclass = shellgroup.name
        helpclass = 'help-%d' % len(texttoid)
        text = m.text
        if text:
            text = text.decode('utf-8')
            helpclass = texttoid.setdefault(text, helpclass)
        else:
            # unknowns in the shell group are possible when our parser left
            # an unparsed remainder, see matcher._markunparsedunknown
            commandclass += ' unknown'
            helpclass = ''
        if helpclass:
            idstartpos.setdefault(helpclass, m.start)

        d = _makematch(m.start, m.end, m.match, commandclass, helpclass)
        formatmatch(d, m, expansions)

        l.append(d)
    matches.append(l)

    for commandgroup in commandgroups:
        l = []
        for m in commandgroup.results:
            commandclass = commandgroup.name
            helpclass = 'help-%d' % len(texttoid)
            text = m.text
            if text:
                text = text.decode('utf-8')
                helpclass = texttoid.setdefault(text, helpclass)
            else:
                commandclass += ' unknown'
                helpclass = ''
            if helpclass:
                idstartpos.setdefault(helpclass, m.start)

            d = _makematch(m.start, m.end, m.match, commandclass, helpclass)
            formatmatch(d, m, expansions)

            l.append(d)

        d = l[0]
        d['commandclass'] += ' simplecommandstart'
        if commandgroup.manpage:
            d['name'] = commandgroup.manpage.name
            d['section'] = commandgroup.manpage.section
            if '.' not in d['match']:
                d['match'] = '%s(%s)' % (d['match'], d['section'])
            d['suggestions'] = commandgroup.suggestions
            d['source'] = commandgroup.manpage.source[:-5]
        matches.append(l)

    matches = list(itertools.chain.from_iterable(matches))
    helpers.suggestions(matches, command)

    # _checkoverlaps(matcher_.s, matches)
    matches.sort(key=lambda d: d['start'])

    it = util.peekable(iter(matches))
    while it.hasnext():
        m = it.next()
        spaces = 0
        if it.hasnext():
            spaces = it.peek()['start'] - m['end']
        m['spaces'] = ' ' * spaces

    helptext = sorted(texttoid.iteritems(), key=lambda (k, v): idstartpos[v])

    return matches, helptext
Exemple #6
0
def explaincommand(command, store):
    matcher_ = matcher.matcher(command, store)
    groups = matcher_.match()
    shellgroup = groups[0]
    commandgroups = groups[1:]
    matches = []

    # save a mapping between the help text to its assigned id,
    # we're going to reuse ids that have the same text
    texttoid = {}

    # remember where each assigned id has started in the source,
    # we're going to use it later on to sort the help text by start
    # position
    idstartpos = {}

    l = []
    for m in shellgroup.results:
        commandclass = shellgroup.name
        helpclass = 'help-%d' % len(texttoid)
        text = m.text
        if text:
            text = text.decode('utf-8')
            helpclass = texttoid.setdefault(text, helpclass)
        else:
            # unknowns in the shell group are possible when our parser left
            # an unparsed remainder, see matcher._markunparsedunknown
            commandclass += ' unknown'
            helpclass = ''
        if helpclass:
            idstartpos.setdefault(helpclass, m.start)
        d = {'match' : m.match,
             'start' : m.start, 'end' : m.end,
             'commandclass' : commandclass, 'helpclass' : helpclass}
        l.append(d)
    matches.append(l)

    for commandgroup in commandgroups:
        l = []
        for m in commandgroup.results:
            commandclass = commandgroup.name
            helpclass = 'help-%d' % len(texttoid)
            text = m.text
            if text:
                text = text.decode('utf-8')
                helpclass = texttoid.setdefault(text, helpclass)
            else:
                commandclass += ' unknown'
                helpclass = ''
            if helpclass:
                idstartpos.setdefault(helpclass, m.start)
            d = {'match' : m.match,
                 'start' : m.start, 'end' : m.end,
                 'commandclass' : commandclass, 'helpclass' : helpclass}
            l.append(d)

        d = l[0]
        d['commandclass'] += ' simplecommandstart'
        if commandgroup.manpage:
            d['name'] = commandgroup.manpage.name
            d['section'] = commandgroup.manpage.section
            if '.' not in d['match']:
                d['match'] = '%s(%s)' % (d['match'], d['section'])
            d['suggestions'] = commandgroup.suggestions
            d['source'] = commandgroup.manpage.source[:-5]
        matches.append(l)

    matches = list(itertools.chain.from_iterable(matches))
    helpers.suggestions(matches, command)
    matches.sort(key=lambda d: d['start'])

    it = util.peekable(iter(matches))
    while it.hasnext():
        m = it.next()
        spaces = 0
        if it.hasnext():
            spaces = it.peek()['start'] - m['end']
        m['spaces'] = ' ' * spaces

    helptext = sorted(texttoid.iteritems(), key=lambda (k, v): idstartpos[v])
    return matches, helptext
Exemple #7
0
def tokenize(s):
    '''tokenize s, we use (the limited) shlex module for now, in the future
    this could be improved to a minimal bash parser

    another bit of information we return besides the tokens themselves is the start
    and end position of the token in the original string. this is tricky since
    shlex doesn't provide it and we have to look into its string pointer'''
    s = s.strip()
    stream = StringIO(s)

    lexer = shlex.shlex(stream, posix=True)
    lexer.whitespace_split = True
    lexer.commenters = ''

    startpos = 0
    it = util.peekable(lexer)
    try:
        for t in it:
            endpos = stream.tell()

            # remember endpos, we're going to peek next which will move the underlying
            # string pointer
            tt = endpos

            # if we have another token, backup one char to not include the space
            # between args
            if it.hasnext():
                endpos -= 1

            # startpos is the previous endpos which may include a lot of spaces
            # between arguments

            # before: 'a     b'
            #           ^
            while s[startpos].isspace():
                startpos += 1
            # after:  'a     b'
            #                ^

            yielded = False
            if '=' in t:
                x, y = t.split('=', 1)
                # was it something like 'x=..'?
                if x:
                    # was it 'x='?
                    if not y:
                        # we don't want to lose the =, so yield it by itself and
                        # it will be marked as unknown by the matcher

                        # yield 'x' and '='
                        yield tokenstate(startpos, startpos+len(x), x)
                        yield tokenstate(startpos+len(x), startpos+len(x)+1, '=')
                    else:
                        # yield 'x=..'
                        yield tokenstate(startpos, startpos+len(x), x)
                    yielded = True
                if y:
                    # yield '=y'
                    yield tokenstate(startpos+len(x), endpos, '=' + y)
                    yielded = True

            if not yielded:
                # no '=' in current token or it was literally just '='
                yield tokenstate(startpos, endpos, t)

            startpos = tt
    except ValueError, e:
        raise errors.ParsingError(str(e), s, stream.tell())
Exemple #8
0
    def match(self):
        '''parse s and return a list of matchresult

        match works as follows:
        - tokenize the string using options.tokenize
        - look up a man page that matches the first token
        - check if the man page has 'subcommands', e.g. git commit, if so
          try to find a man page for the first two tokens
        - iterate all tokens
          - search the token as is
          - if the token is a short option (-abc) try to look up each option
            individually
          - if the previous match expected an arg, merge this token with
            the previous one
          - partialmatch: if this man page allows options to start without '-',
            try to match all characters individually (e.g. tar xzvf)
          - check if this man page has any positional arguments
          - mark this token as unknown

        after we finish processing all tokens, we:
        - merge unknown consecutive matches to become one matchresult
        - merge adjacent matchresults that have the same help text (e.g. if we
          had -vvv)
        '''
        logger.info('matching string %r', self.s)
        self.tokens = util.peekable(options.tokenize(self.s))
        #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens])
        self.ts = None

        def attempt_fuzzy(chars):
            m = []
            if chars[0] == '-':
                tokens = [chars[0:2]] + list(chars[2:])
                considerarg = True
            else:
                tokens = list(chars)
                considerarg = False

            oldp = self.pos
            for t in tokens:
                op = t if t[0] == '-' else '-' + t
                option = self.find_option(op)
                if option:
                    if considerarg and not m and option.expectsarg:
                        logger.info(
                            'option %r expected an arg, taking the rest too',
                            option)
                        return [
                            matchresult(self.pos, self.pos + len(chars),
                                        option.text, None)
                        ]

                    mr = matchresult(self.pos, self.pos + len(t), option.text,
                                     None)
                    m.append(mr)
                else:
                    m.append(self.unknown(t, self.pos + len(t)))
                self.pos += len(t)
            self.pos = oldp
            return m

        self.pos, endpos, token = self.nexttoken()
        mps = self.findmanpages(token)
        self.manpage = mps[0]
        if self.manpage.multicommand and self.tokens.hasnext():
            try:
                multi = '%s %s' % (token, self.tokens.peek()[2])
                logger.info(
                    '%r is a multicommand, trying to get another token and look up %r',
                    self.manpage, multi)
                mps = self.findmanpages(multi)
                self.manpage = mps[0]
                self.nexttoken()
                token = multi
            except errors.ProgramDoesNotExist:
                logger.info('no manpage %r for multicommand %r', multi,
                            self.manpage)

        option = None
        matches = []
        matches.append(matchresult(0, len(token), self.manpage.synopsis, None))

        while self.tokens.hasnext():
            self.pos, endpos, token = self.nexttoken()
            logger.info('trying to match token: %r', token)

            self._prevoption = self._currentoption
            option = self.find_option(token)
            if option:
                logger.info('found an exact match for %r: %r', token, option)
                mr = matchresult(self.pos, self.ts.endpos, option.text, None)
                matches.append(mr)
            else:
                if token != '-' and token.startswith(
                        '-') and not token.startswith('--'):
                    logger.debug('looks like a short option')
                    if len(token) > 2:
                        logger.info("trying to split it up")
                        matches.extend(attempt_fuzzy(token))
                        self.pos += len(token)
                    else:
                        matches.append(self.unknown(token))
                elif self._prevoption and self._prevoption.expectsarg:
                    logger.info(
                        "previous option possibly expected an arg, and we can't"
                        " find an option to match the current token, assuming it's an arg"
                    )
                    ea = self._prevoption.expectsarg
                    possibleargs = ea if isinstance(ea, list) else []
                    take = True
                    if possibleargs and token not in possibleargs:
                        take = False
                        logger.info(
                            'token %r not in list of possible args %r for %r',
                            token, possibleargs, self._prevoption)
                    if take:
                        pmr = matches[-1]
                        mr = matchresult(pmr.start, self.ts.endpos, pmr.text,
                                         None)
                        matches[-1] = mr
                    else:
                        matches.append(self.unknown(token))
                elif self.manpage.partialmatch:
                    logger.info('attemping to do a partial match')

                    m = attempt_fuzzy(token)
                    if any(mm.unknown for mm in m):
                        logger.info('one of %r was unknown', token)
                        matches.append(self.unknown(token))
                    else:
                        matches += m
                        self.pos += len(token)
                elif self.manpage.arguments:
                    d = self.manpage.arguments
                    k = list(d.keys())[0]
                    logger.info('got arguments, using %r', k)
                    text = d[k]
                    mr = matchresult(self.pos, self.ts.endpos, text, None)
                    matches.append(mr)
                else:
                    matches.append(self.unknown(token))

        def debugmatch():
            s = '\n'.join([
                '%d) %r = %r' % (i, self.s[m.start:m.end], m.text)
                for i, m in enumerate(matches)
            ])
            return s

        logger.debug('%r matches:\n%s', self.s, debugmatch())

        matches = self._mergeunknowns(matches)
        matches = self._mergeadjacent(matches)

        # add matchresult.match to existing matches
        for i, m in enumerate(matches):
            assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s))
            matches[i] = matchresult(m.start, m.end, m.text,
                                     self.s[m.start:m.end])

        r = [(self.manpage.name, matches)]
        for mp in mps[1:]:
            r.append((mp, None))
        return r
Exemple #9
0
def tokenize(s):
    '''tokenize s, we use (the limited) shlex module for now, in the future
    this could be improved to a minimal bash parser

    another bit of information we return besides the tokens themselves is the start
    and end position of the token in the original string. this is tricky since
    shlex doesn't provide it and we have to look into its string pointer'''
    s = s.strip()
    stream = StringIO(s)

    lexer = shlex.shlex(stream, posix=True)
    lexer.whitespace_split = True
    lexer.commenters = ''

    startpos = 0
    it = util.peekable(lexer)
    for t in it:
        endpos = stream.tell()

        # remember endpos, we're going to peek next which will move the underlying
        # string pointer
        tt = endpos

        # if we have another token, backup one char to not include the space
        # between args
        if it.hasnext():
            endpos -= 1

        # startpos is the previous endpos which may include a lot of spaces
        # between arguments

        # before: 'a     b'
        #           ^
        while s[startpos].isspace():
            startpos += 1
        # after:  'a     b'
        #                ^

        yielded = False
        if '=' in t:
            x, y = t.split('=', 1)
            # was it something like 'x=..'?
            if x:
                # was it 'x='?
                if not y:
                    # we don't want to lose the =, so yield it by itself and
                    # it will be marked as unknown by the matcher

                    # yield 'x' and '='
                    yield tokenstate(startpos, startpos+len(x), x)
                    yield tokenstate(startpos+len(x), startpos+len(x)+1, '=')
                else:
                    # yield 'x=..'
                    yield tokenstate(startpos, startpos+len(x), x)
                yielded = True
            if y:
                # yield '=y'
                yield tokenstate(startpos+len(x), endpos, '=' + y)
                yielded = True

        if not yielded:
            # no '=' in current token or it was literally just '='
            yield tokenstate(startpos, endpos, t)

        startpos = tt
Exemple #10
0
    def match(self):
        '''parse s and return a list of matchresult

        match works as follows:
        - tokenize the string using options.tokenize
        - look up a man page that matches the first token
        - check if the man page has 'subcommands', e.g. git commit, if so
          try to find a man page for the first two tokens
        - iterate all tokens
          - search the token as is
          - if the token is a short option (-abc) try to look up each option
            individually
          - if the previous match expected an arg, merge this token with
            the previous one
          - partialmatch: if this man page allows options to start without '-',
            try to match all characters individually (e.g. tar xzvf)
          - check if this man page has any positional arguments
          - mark this token as unknown

        after we finish processing all tokens, we:
        - merge unknown consecutive matches to become one matchresult
        - merge adjacent matchresults that have the same help text (e.g. if we
          had -vvv)
        '''
        logger.info('matching string %r', self.s)
        self.tokens = util.peekable(options.tokenize(self.s))
        #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens])
        self.ts = None

        def attempt_fuzzy(chars):
            m = []
            if chars[0] == '-':
                tokens = [chars[0:2]] + list(chars[2:])
                considerarg = True
            else:
                tokens = list(chars)
                considerarg = False

            oldp = self.pos
            for t in tokens:
                op = t if t[0] == '-' else '-' + t
                option = self.find_option(op)
                if option:
                    if considerarg and not m and option.expectsarg:
                        logger.info('option %r expected an arg, taking the rest too', option)
                        return [matchresult(self.pos, self.pos+len(chars), option.text, None)]

                    mr = matchresult(self.pos, self.pos+len(t), option.text, None)
                    m.append(mr)
                else:
                    m.append(self.unknown(t, self.pos+len(t)))
                self.pos += len(t)
            self.pos = oldp
            return m

        self.pos, endpos, token = self.nexttoken()
        mps = self.findmanpages(token)
        self.manpage = mps[0]
        if self.manpage.multicommand and self.tokens.hasnext():
            try:
                multi = '%s %s' % (token, self.tokens.peek()[2])
                logger.info('%r is a multicommand, trying to get another token and look up %r', self.manpage, multi)
                mps = self.findmanpages(multi)
                self.manpage = mps[0]
                self.nexttoken()
                token = multi
            except errors.ProgramDoesNotExist:
                logger.info('no manpage %r for multicommand %r', multi, self.manpage)

        option = None
        matches = []
        matches.append(matchresult(0, len(token), self.manpage.synopsis, None))

        while self.tokens.hasnext():
            self.pos, endpos, token = self.nexttoken()
            logger.info('trying to match token: %r', token)

            self._prevoption = self._currentoption
            option = self.find_option(token)
            if option:
                logger.info('found an exact match for %r: %r', token, option)
                mr = matchresult(self.pos, self.ts.endpos, option.text, None)
                matches.append(mr)
            else:
                if token != '-' and token.startswith('-') and not token.startswith('--'):
                    logger.debug('looks like a short option')
                    if len(token) > 2:
                        logger.info("trying to split it up")
                        matches.extend(attempt_fuzzy(token))
                        self.pos += len(token)
                    else:
                        matches.append(self.unknown(token))
                elif self._prevoption and self._prevoption.expectsarg:
                    logger.info("previous option possibly expected an arg, and we can't"
                            " find an option to match the current token, assuming it's an arg")
                    ea = self._prevoption.expectsarg
                    possibleargs = ea if isinstance(ea, list) else []
                    take = True
                    if possibleargs and token not in possibleargs:
                        take = False
                        logger.info('token %r not in list of possible args %r for %r',
                                    token, possibleargs, self._prevoption)
                    if take:
                        pmr = matches[-1]
                        mr = matchresult(pmr.start, self.ts.endpos, pmr.text, None)
                        matches[-1] = mr
                    else:
                        matches.append(self.unknown(token))
                elif self.manpage.partialmatch:
                    logger.info('attemping to do a partial match')

                    m = attempt_fuzzy(token)
                    if any(mm.unknown for mm in m):
                        logger.info('one of %r was unknown', token)
                        matches.append(self.unknown(token))
                    else:
                        matches += m
                        self.pos += len(token)
                elif self.manpage.arguments:
                    d = self.manpage.arguments
                    k = list(d.keys())[0]
                    logger.info('got arguments, using %r', k)
                    text = d[k]
                    mr = matchresult(self.pos, self.ts.endpos, text, None)
                    matches.append(mr)
                else:
                    matches.append(self.unknown(token))

        def debugmatch():
            s = '\n'.join(['%d) %r = %r' % (i, self.s[m.start:m.end], m.text) for i, m in enumerate(matches)])
            return s

        logger.debug('%r matches:\n%s', self.s, debugmatch())

        matches = self._mergeunknowns(matches)
        matches = self._mergeadjacent(matches)

        # add matchresult.match to existing matches
        for i, m in enumerate(matches):
            assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s))
            matches[i] = matchresult(m.start, m.end, m.text, self.s[m.start:m.end])

        r = [(self.manpage.name, matches)]
        for mp in mps[1:]:
            r.append((mp, None))
        return r