def explain(section, program): s = store.store('explainshell', config.MONGO_URI) try: if 'args' in request.args: args = request.args['args'] command = '%s %s' % (program, args) matcher_ = matcher.matcher(command, s, section) mrs = matcher_.match() mr = mrs[0][1] l = [] it = util.peekable(iter(mr)) while it.hasnext(): m = it.next() spaces = 0 if it.hasnext(): spaces = it.peek().start - m.end spaces = ' ' * spaces text = m.text if text: text = text.decode('utf-8') d = { 'match': m.match, 'unknown': m.unknown, 'text': text, 'spaces': spaces } l.append(d) d = l[0] d['section'] = matcher_.manpage.section d['match'] = '%s(%s)' % (d['match'], d['section']) d['source'] = matcher_.manpage.source[:-5] others = helpers.others([x[0] for x in mrs[1:]]) return render_template('explain.html', program=l[0], matches=l, othersections=others, getargs=args) else: logger.info('/explain section=%r program=%r', section, program) mps = s.findmanpage(program, section) mp = mps.pop(0) program = mp.namesection mp = { 'source': mp.source[:-3], 'section': mp.section, 'program': program, 'synopsis': mp.synopsis, 'options': [o.text.decode('utf-8') for o in mp.options] } othersections = helpers.others(mps) logger.info('others: %s', othersections) return render_template('options.html', mp=mp, othersections=helpers.others(mps)) except errors.ProgramDoesNotExist, e: return render_template('error.html', prog=e.args[0])
def explain(section, program): s = store.store('explainshell', config.MONGO_URI) try: if 'args' in request.args: args = request.args['args'] if program is None: program = args.split(' ')[0] args = ' '.join(args.split(' ')[1:]) command = '%s %s' % (program, args) matcher_ = matcher.matcher(command, s, section) mrs = matcher_.match() mr = mrs[0][1] l = [] it = util.peekable(iter(mr)) while it.hasnext(): m = it.next() spaces = 0 if it.hasnext(): spaces = it.peek().start - m.end spaces = ' ' * spaces text = m.text if text: text = text.decode('utf-8') d = {'match' : m.match, 'unknown' : m.unknown, 'text' : text, 'spaces' : spaces} l.append(d) d = l[0] d['section'] = matcher_.manpage.section d['match'] = '%s(%s)' % (d['match'], d['section']) d['source'] = matcher_.manpage.source[:-5] others = helpers.others([x[0] for x in mrs[1:]]) return render_template('explain.html', program=l[0], matches=l, othersections=others, getargs=args) else: logger.info('/explain section=%r program=%r', section, program) mps = s.findmanpage(program, section) mp = mps.pop(0) program = mp.namesection mp = {'source' : mp.source[:-3], 'section' : mp.section, 'program' : program, 'synopsis' : mp.synopsis, 'options' : [o.text.decode('utf-8') for o in mp.options]} othersections = helpers.others(mps) logger.info('others: %s', othersections) return render_template('options.html', mp=mp, othersections=helpers.others(mps)) except errors.ProgramDoesNotExist, e: return render_template('error.html', prog=e.args[0])
def _mergeadjacent(self, matches): merged = [] it = util.peekable(iter(matches)) curr = it.next() while it.hasnext(): next = it.peek() if curr.text != next.text: merged.append(curr) curr = it.next() else: logger.debug('merging adjacent identical matches %d and %d', it.index - 1, it.index) it.next() curr = matchresult(curr.start, next.end, curr.text, curr.match) merged.append(curr) return merged
def explaincommand(command, store): matcher_ = matcher.matcher(command, store) groups = matcher_.match() expansions = matcher_.expansions shellgroup = groups[0] commandgroups = groups[1:] matches = [] # save a mapping between the help text to its assigned id, # we're going to reuse ids that have the same text texttoid = {} # remember where each assigned id has started in the source, # we're going to use it later on to sort the help text by start # position idstartpos = {} l = [] for m in shellgroup.results: commandclass = shellgroup.name helpclass = 'help-%d' % len(texttoid) text = m.text if text: text = text.decode('utf-8') helpclass = texttoid.setdefault(text, helpclass) else: # unknowns in the shell group are possible when our parser left # an unparsed remainder, see matcher._markunparsedunknown commandclass += ' unknown' helpclass = '' if helpclass: idstartpos.setdefault(helpclass, m.start) d = _makematch(m.start, m.end, m.match, commandclass, helpclass) formatmatch(d, m, expansions) l.append(d) matches.append(l) for commandgroup in commandgroups: l = [] for m in commandgroup.results: commandclass = commandgroup.name helpclass = 'help-%d' % len(texttoid) text = m.text if text: text = text.decode('utf-8') helpclass = texttoid.setdefault(text, helpclass) else: commandclass += ' unknown' helpclass = '' if helpclass: idstartpos.setdefault(helpclass, m.start) d = _makematch(m.start, m.end, m.match, commandclass, helpclass) formatmatch(d, m, expansions) l.append(d) d = l[0] d['commandclass'] += ' simplecommandstart' if commandgroup.manpage: d['name'] = commandgroup.manpage.name d['section'] = commandgroup.manpage.section if '.' not in d['match']: d['match'] = '%s(%s)' % (d['match'], d['section']) d['suggestions'] = commandgroup.suggestions d['source'] = commandgroup.manpage.source[:-5] matches.append(l) matches = list(itertools.chain.from_iterable(matches)) helpers.suggestions(matches, command) # _checkoverlaps(matcher_.s, matches) matches.sort(key=lambda d: d['start']) it = util.peekable(iter(matches)) while it.hasnext(): m = it.next() spaces = 0 if it.hasnext(): spaces = it.peek()['start'] - m['end'] m['spaces'] = ' ' * spaces helptext = sorted(texttoid.iteritems(), key=lambda (k, v): idstartpos[v]) return matches, helptext
def explaincommand(command, store): matcher_ = matcher.matcher(command, store) groups = matcher_.match() shellgroup = groups[0] commandgroups = groups[1:] matches = [] # save a mapping between the help text to its assigned id, # we're going to reuse ids that have the same text texttoid = {} # remember where each assigned id has started in the source, # we're going to use it later on to sort the help text by start # position idstartpos = {} l = [] for m in shellgroup.results: commandclass = shellgroup.name helpclass = 'help-%d' % len(texttoid) text = m.text if text: text = text.decode('utf-8') helpclass = texttoid.setdefault(text, helpclass) else: # unknowns in the shell group are possible when our parser left # an unparsed remainder, see matcher._markunparsedunknown commandclass += ' unknown' helpclass = '' if helpclass: idstartpos.setdefault(helpclass, m.start) d = {'match' : m.match, 'start' : m.start, 'end' : m.end, 'commandclass' : commandclass, 'helpclass' : helpclass} l.append(d) matches.append(l) for commandgroup in commandgroups: l = [] for m in commandgroup.results: commandclass = commandgroup.name helpclass = 'help-%d' % len(texttoid) text = m.text if text: text = text.decode('utf-8') helpclass = texttoid.setdefault(text, helpclass) else: commandclass += ' unknown' helpclass = '' if helpclass: idstartpos.setdefault(helpclass, m.start) d = {'match' : m.match, 'start' : m.start, 'end' : m.end, 'commandclass' : commandclass, 'helpclass' : helpclass} l.append(d) d = l[0] d['commandclass'] += ' simplecommandstart' if commandgroup.manpage: d['name'] = commandgroup.manpage.name d['section'] = commandgroup.manpage.section if '.' not in d['match']: d['match'] = '%s(%s)' % (d['match'], d['section']) d['suggestions'] = commandgroup.suggestions d['source'] = commandgroup.manpage.source[:-5] matches.append(l) matches = list(itertools.chain.from_iterable(matches)) helpers.suggestions(matches, command) matches.sort(key=lambda d: d['start']) it = util.peekable(iter(matches)) while it.hasnext(): m = it.next() spaces = 0 if it.hasnext(): spaces = it.peek()['start'] - m['end'] m['spaces'] = ' ' * spaces helptext = sorted(texttoid.iteritems(), key=lambda (k, v): idstartpos[v]) return matches, helptext
def tokenize(s): '''tokenize s, we use (the limited) shlex module for now, in the future this could be improved to a minimal bash parser another bit of information we return besides the tokens themselves is the start and end position of the token in the original string. this is tricky since shlex doesn't provide it and we have to look into its string pointer''' s = s.strip() stream = StringIO(s) lexer = shlex.shlex(stream, posix=True) lexer.whitespace_split = True lexer.commenters = '' startpos = 0 it = util.peekable(lexer) try: for t in it: endpos = stream.tell() # remember endpos, we're going to peek next which will move the underlying # string pointer tt = endpos # if we have another token, backup one char to not include the space # between args if it.hasnext(): endpos -= 1 # startpos is the previous endpos which may include a lot of spaces # between arguments # before: 'a b' # ^ while s[startpos].isspace(): startpos += 1 # after: 'a b' # ^ yielded = False if '=' in t: x, y = t.split('=', 1) # was it something like 'x=..'? if x: # was it 'x='? if not y: # we don't want to lose the =, so yield it by itself and # it will be marked as unknown by the matcher # yield 'x' and '=' yield tokenstate(startpos, startpos+len(x), x) yield tokenstate(startpos+len(x), startpos+len(x)+1, '=') else: # yield 'x=..' yield tokenstate(startpos, startpos+len(x), x) yielded = True if y: # yield '=y' yield tokenstate(startpos+len(x), endpos, '=' + y) yielded = True if not yielded: # no '=' in current token or it was literally just '=' yield tokenstate(startpos, endpos, t) startpos = tt except ValueError, e: raise errors.ParsingError(str(e), s, stream.tell())
def match(self): '''parse s and return a list of matchresult match works as follows: - tokenize the string using options.tokenize - look up a man page that matches the first token - check if the man page has 'subcommands', e.g. git commit, if so try to find a man page for the first two tokens - iterate all tokens - search the token as is - if the token is a short option (-abc) try to look up each option individually - if the previous match expected an arg, merge this token with the previous one - partialmatch: if this man page allows options to start without '-', try to match all characters individually (e.g. tar xzvf) - check if this man page has any positional arguments - mark this token as unknown after we finish processing all tokens, we: - merge unknown consecutive matches to become one matchresult - merge adjacent matchresults that have the same help text (e.g. if we had -vvv) ''' logger.info('matching string %r', self.s) self.tokens = util.peekable(options.tokenize(self.s)) #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens]) self.ts = None def attempt_fuzzy(chars): m = [] if chars[0] == '-': tokens = [chars[0:2]] + list(chars[2:]) considerarg = True else: tokens = list(chars) considerarg = False oldp = self.pos for t in tokens: op = t if t[0] == '-' else '-' + t option = self.find_option(op) if option: if considerarg and not m and option.expectsarg: logger.info( 'option %r expected an arg, taking the rest too', option) return [ matchresult(self.pos, self.pos + len(chars), option.text, None) ] mr = matchresult(self.pos, self.pos + len(t), option.text, None) m.append(mr) else: m.append(self.unknown(t, self.pos + len(t))) self.pos += len(t) self.pos = oldp return m self.pos, endpos, token = self.nexttoken() mps = self.findmanpages(token) self.manpage = mps[0] if self.manpage.multicommand and self.tokens.hasnext(): try: multi = '%s %s' % (token, self.tokens.peek()[2]) logger.info( '%r is a multicommand, trying to get another token and look up %r', self.manpage, multi) mps = self.findmanpages(multi) self.manpage = mps[0] self.nexttoken() token = multi except errors.ProgramDoesNotExist: logger.info('no manpage %r for multicommand %r', multi, self.manpage) option = None matches = [] matches.append(matchresult(0, len(token), self.manpage.synopsis, None)) while self.tokens.hasnext(): self.pos, endpos, token = self.nexttoken() logger.info('trying to match token: %r', token) self._prevoption = self._currentoption option = self.find_option(token) if option: logger.info('found an exact match for %r: %r', token, option) mr = matchresult(self.pos, self.ts.endpos, option.text, None) matches.append(mr) else: if token != '-' and token.startswith( '-') and not token.startswith('--'): logger.debug('looks like a short option') if len(token) > 2: logger.info("trying to split it up") matches.extend(attempt_fuzzy(token)) self.pos += len(token) else: matches.append(self.unknown(token)) elif self._prevoption and self._prevoption.expectsarg: logger.info( "previous option possibly expected an arg, and we can't" " find an option to match the current token, assuming it's an arg" ) ea = self._prevoption.expectsarg possibleargs = ea if isinstance(ea, list) else [] take = True if possibleargs and token not in possibleargs: take = False logger.info( 'token %r not in list of possible args %r for %r', token, possibleargs, self._prevoption) if take: pmr = matches[-1] mr = matchresult(pmr.start, self.ts.endpos, pmr.text, None) matches[-1] = mr else: matches.append(self.unknown(token)) elif self.manpage.partialmatch: logger.info('attemping to do a partial match') m = attempt_fuzzy(token) if any(mm.unknown for mm in m): logger.info('one of %r was unknown', token) matches.append(self.unknown(token)) else: matches += m self.pos += len(token) elif self.manpage.arguments: d = self.manpage.arguments k = list(d.keys())[0] logger.info('got arguments, using %r', k) text = d[k] mr = matchresult(self.pos, self.ts.endpos, text, None) matches.append(mr) else: matches.append(self.unknown(token)) def debugmatch(): s = '\n'.join([ '%d) %r = %r' % (i, self.s[m.start:m.end], m.text) for i, m in enumerate(matches) ]) return s logger.debug('%r matches:\n%s', self.s, debugmatch()) matches = self._mergeunknowns(matches) matches = self._mergeadjacent(matches) # add matchresult.match to existing matches for i, m in enumerate(matches): assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s)) matches[i] = matchresult(m.start, m.end, m.text, self.s[m.start:m.end]) r = [(self.manpage.name, matches)] for mp in mps[1:]: r.append((mp, None)) return r
def tokenize(s): '''tokenize s, we use (the limited) shlex module for now, in the future this could be improved to a minimal bash parser another bit of information we return besides the tokens themselves is the start and end position of the token in the original string. this is tricky since shlex doesn't provide it and we have to look into its string pointer''' s = s.strip() stream = StringIO(s) lexer = shlex.shlex(stream, posix=True) lexer.whitespace_split = True lexer.commenters = '' startpos = 0 it = util.peekable(lexer) for t in it: endpos = stream.tell() # remember endpos, we're going to peek next which will move the underlying # string pointer tt = endpos # if we have another token, backup one char to not include the space # between args if it.hasnext(): endpos -= 1 # startpos is the previous endpos which may include a lot of spaces # between arguments # before: 'a b' # ^ while s[startpos].isspace(): startpos += 1 # after: 'a b' # ^ yielded = False if '=' in t: x, y = t.split('=', 1) # was it something like 'x=..'? if x: # was it 'x='? if not y: # we don't want to lose the =, so yield it by itself and # it will be marked as unknown by the matcher # yield 'x' and '=' yield tokenstate(startpos, startpos+len(x), x) yield tokenstate(startpos+len(x), startpos+len(x)+1, '=') else: # yield 'x=..' yield tokenstate(startpos, startpos+len(x), x) yielded = True if y: # yield '=y' yield tokenstate(startpos+len(x), endpos, '=' + y) yielded = True if not yielded: # no '=' in current token or it was literally just '=' yield tokenstate(startpos, endpos, t) startpos = tt
def match(self): '''parse s and return a list of matchresult match works as follows: - tokenize the string using options.tokenize - look up a man page that matches the first token - check if the man page has 'subcommands', e.g. git commit, if so try to find a man page for the first two tokens - iterate all tokens - search the token as is - if the token is a short option (-abc) try to look up each option individually - if the previous match expected an arg, merge this token with the previous one - partialmatch: if this man page allows options to start without '-', try to match all characters individually (e.g. tar xzvf) - check if this man page has any positional arguments - mark this token as unknown after we finish processing all tokens, we: - merge unknown consecutive matches to become one matchresult - merge adjacent matchresults that have the same help text (e.g. if we had -vvv) ''' logger.info('matching string %r', self.s) self.tokens = util.peekable(options.tokenize(self.s)) #logger.info('tokenized %r to %s', self.s, [t[1] for t in self.tokens]) self.ts = None def attempt_fuzzy(chars): m = [] if chars[0] == '-': tokens = [chars[0:2]] + list(chars[2:]) considerarg = True else: tokens = list(chars) considerarg = False oldp = self.pos for t in tokens: op = t if t[0] == '-' else '-' + t option = self.find_option(op) if option: if considerarg and not m and option.expectsarg: logger.info('option %r expected an arg, taking the rest too', option) return [matchresult(self.pos, self.pos+len(chars), option.text, None)] mr = matchresult(self.pos, self.pos+len(t), option.text, None) m.append(mr) else: m.append(self.unknown(t, self.pos+len(t))) self.pos += len(t) self.pos = oldp return m self.pos, endpos, token = self.nexttoken() mps = self.findmanpages(token) self.manpage = mps[0] if self.manpage.multicommand and self.tokens.hasnext(): try: multi = '%s %s' % (token, self.tokens.peek()[2]) logger.info('%r is a multicommand, trying to get another token and look up %r', self.manpage, multi) mps = self.findmanpages(multi) self.manpage = mps[0] self.nexttoken() token = multi except errors.ProgramDoesNotExist: logger.info('no manpage %r for multicommand %r', multi, self.manpage) option = None matches = [] matches.append(matchresult(0, len(token), self.manpage.synopsis, None)) while self.tokens.hasnext(): self.pos, endpos, token = self.nexttoken() logger.info('trying to match token: %r', token) self._prevoption = self._currentoption option = self.find_option(token) if option: logger.info('found an exact match for %r: %r', token, option) mr = matchresult(self.pos, self.ts.endpos, option.text, None) matches.append(mr) else: if token != '-' and token.startswith('-') and not token.startswith('--'): logger.debug('looks like a short option') if len(token) > 2: logger.info("trying to split it up") matches.extend(attempt_fuzzy(token)) self.pos += len(token) else: matches.append(self.unknown(token)) elif self._prevoption and self._prevoption.expectsarg: logger.info("previous option possibly expected an arg, and we can't" " find an option to match the current token, assuming it's an arg") ea = self._prevoption.expectsarg possibleargs = ea if isinstance(ea, list) else [] take = True if possibleargs and token not in possibleargs: take = False logger.info('token %r not in list of possible args %r for %r', token, possibleargs, self._prevoption) if take: pmr = matches[-1] mr = matchresult(pmr.start, self.ts.endpos, pmr.text, None) matches[-1] = mr else: matches.append(self.unknown(token)) elif self.manpage.partialmatch: logger.info('attemping to do a partial match') m = attempt_fuzzy(token) if any(mm.unknown for mm in m): logger.info('one of %r was unknown', token) matches.append(self.unknown(token)) else: matches += m self.pos += len(token) elif self.manpage.arguments: d = self.manpage.arguments k = list(d.keys())[0] logger.info('got arguments, using %r', k) text = d[k] mr = matchresult(self.pos, self.ts.endpos, text, None) matches.append(mr) else: matches.append(self.unknown(token)) def debugmatch(): s = '\n'.join(['%d) %r = %r' % (i, self.s[m.start:m.end], m.text) for i, m in enumerate(matches)]) return s logger.debug('%r matches:\n%s', self.s, debugmatch()) matches = self._mergeunknowns(matches) matches = self._mergeadjacent(matches) # add matchresult.match to existing matches for i, m in enumerate(matches): assert m.end <= len(self.s), '%d %d' % (m.end, len(self.s)) matches[i] = matchresult(m.start, m.end, m.text, self.s[m.start:m.end]) r = [(self.manpage.name, matches)] for mp in mps[1:]: r.append((mp, None)) return r