def _parsetext(lines): paragraphlines = [] section = None i = 0 for l in lines: l = re.sub(_href, r'<a href="http://manpages.ubuntu.com/manpages/precise/en/man\2/\1.\2.html">', l) for lookfor, replacewith in _replacements: l = re.sub(lookfor, replacewith, l) # confirm the line is valid utf8 lreplaced = l.decode('utf8', 'ignore').encode('utf8') if lreplaced != l: logger.error('line %r contains invalid utf8', l) l = lreplaced raise ValueError if l.startswith('<b>'): # section section = re.sub(_section, r'\1', l) else: foundsection = False if l.strip().startswith('<b>'): inside, outside = bold(l.strip()) if not outside and inside[-1][-1] == ':': foundsection = True section = ' '.join(inside)[:-1] if not foundsection: if not l.strip() and paragraphlines: yield store.paragraph(i, '\n'.join(paragraphlines), section, False) i += 1 paragraphlines = [] elif l.strip(): paragraphlines.append(l) if paragraphlines: yield store.paragraph(i, '\n'.join(paragraphlines), section, False)
def test_sections(self): m = '''<b>SECTION</b> a b c <b>SECTION2</b> a <b>WITH SPACES</b> a <b>EMPTY SECTION SHOULD BE IGNORED</b> <b>SECTION3</b> tNOTASECTION''' parsed = list(manpage._parsetext(m.splitlines())) self.assertTrue(len(parsed) == 5) self.assertEquals(parsed, [store.paragraph(0, 'a\nb', 'SECTION', False), store.paragraph(1, 'c', 'SECTION', False), store.paragraph(2, 'a', 'SECTION2', False), store.paragraph(3, 'a', 'WITH SPACES', False), store.paragraph(4, 'tNOTASECTION', 'SECTION3', False)])
def _parsetext(lines): paragraphlines = [] section = None i = 0 for l in lines: l = re.sub(_href, r'<a href="http://manpages.ubuntu.com/manpages/precise/en/man\2/\1.\2.html">', l) for lookfor, replacewith in _replacements: l = re.sub(lookfor, replacewith, l) lreplaced = l.decode('utf8', 'ignore').encode('utf8') if lreplaced != l: logger.error('line %r contains invalid utf8', l) l = lreplaced raise ValueError if l.startswith('<b>'): # section section = re.sub(_section, r'\1', l) else: foundsection = False if l.strip().startswith('<b>'): inside, outside = bold(l.strip()) if not outside and inside[-1][-1] == ':': foundsection = True section = ' '.join(inside)[:-1] if not foundsection: if not l.strip() and paragraphlines: yield store.paragraph(i, '\n'.join(paragraphlines), section, False) i += 1 paragraphlines = [] elif l.strip(): paragraphlines.append(l) if paragraphlines: yield store.paragraph(i, '\n'.join(paragraphlines), section, False)
def test_paragraphjoiner(self): maxdistance = fixer.paragraphjoiner.maxdistance paragraphs = [store.paragraph(i, chr(ord('a') + i), None, False) for i in range(26)] options = [ store.option(paragraphs[0], [], [], False), store.option(paragraphs[1], [], [], False), store.option(paragraphs[5], [], [], False), store.option(paragraphs[5+maxdistance-1], [], [], False), store.option(paragraphs[15], [], [], False), store.option(paragraphs[17], [], [], False), store.option(paragraphs[-1], [], [], False)] f = fixer.paragraphjoiner(None) merged = f._join(paragraphs, options) #self.assertEquals(merged, 7) #self.assertEquals(len(paragraphs), 19) self.assertEquals(options[0].text, 'a') self.assertEquals(options[1].text.replace('\n', ''), 'bcde') self.assertEquals(options[2].text.replace('\n', ''), 'fghi') self.assertEquals(options[3].text, 'j') self.assertEquals(options[4].text.replace('\n', ''), 'pq') self.assertEquals(options[5].text, 'r') self.assertEquals(options[6].text, 'z') # join again to make sure nothing is changed oldparagraphs = copy.deepcopy(paragraphs) oldoptions = copy.deepcopy(options) f._join(paragraphs, options) self.assertEquals(oldparagraphs, paragraphs) self.assertEquals(oldoptions, options)
def test_paragraphjoiner(self): maxdistance = fixer.paragraphjoiner.maxdistance paragraphs = [store.paragraph(i, chr(ord('a') + i), None, False) for i in range(26)] options = [ store.option(paragraphs[0], [], [], False), store.option(paragraphs[1], [], [], False), store.option(paragraphs[5], [], [], False), store.option(paragraphs[5+maxdistance-1], [], [], False), store.option(paragraphs[15], [], [], False), store.option(paragraphs[17], [], [], False), store.option(paragraphs[-1], [], [], False)] f = fixer.paragraphjoiner(None) merged = f._join(paragraphs, options) #self.assertEquals(merged, 7) #self.assertEquals(len(paragraphs), 19) self.assertEqual(options[0].text, 'a') self.assertEqual(options[1].text.replace('\n', ''), 'bcde') self.assertEqual(options[2].text.replace('\n', ''), 'fghi') self.assertEqual(options[3].text, 'j') self.assertEqual(options[4].text.replace('\n', ''), 'pq') self.assertEqual(options[5].text, 'r') self.assertEqual(options[6].text, 'z') # join again to make sure nothing is changed oldparagraphs = copy.deepcopy(paragraphs) oldoptions = copy.deepcopy(options) f._join(paragraphs, options) self.assertEqual(oldparagraphs, paragraphs) self.assertEqual(oldoptions, options)
def test_extract(self): p1 = store.paragraph(0, '<b>--test</b>=<u>arg</u>\ndesc', '', True) p2 = store.paragraph(1, 'no options here', '', True) p3 = store.paragraph(2, '--foo-bar=<arg>\ndesc', '', True) m = store.manpage('', '', '', [p1, p2, p3], []) options.extract(m) r = m.options self.assertEqual(len(r), 2) self.assertEqual(r[0].text, p1.text) self.assertEqual(r[0].short, []) self.assertEqual(r[0].long, ['--test']) self.assertEqual(r[0].expectsarg, True) self.assertEqual(r[1].text, p3.text) self.assertEqual(r[1].short, []) self.assertEqual(r[1].long, ['--foo-bar']) self.assertEqual(r[1].expectsarg, True)
def test_extract(self): p1 = store.paragraph(0, '<b>--test</b>=<u>arg</u>\ndesc', '', True) p2 = store.paragraph(1, 'no options here', '', True) p3 = store.paragraph(2, '--foo-bar=<arg>\ndesc', '', True) m = store.manpage('', '', '', [p1, p2, p3], []) options.extract(m) r = m.options self.assertEquals(len(r), 2) self.assertEquals(r[0].text, p1.text) self.assertEquals(r[0].short, []) self.assertEquals(r[0].long, ['--test']) self.assertEquals(r[0].expectsarg, True) self.assertEquals(r[1].text, p3.text) self.assertEquals(r[1].short, []) self.assertEquals(r[1].long, ['--foo-bar']) self.assertEquals(r[1].expectsarg, True)
def tag(source): mngr = manager.manager(config.MONGO_URI, 'explainshell', [], False, False) s = mngr.store m = s.findmanpage(source)[0] assert m if 'paragraphs' in request.form: paragraphs = json.loads(request.form['paragraphs']) mparagraphs = [] for d in paragraphs: idx = d['idx'] text = d['text'] section = d['section'] short = [s.strip() for s in d['short']] long = [s.strip() for s in d['long']] expectsarg = _convertvalue(d['expectsarg']) nestedcommand = _convertvalue(d['nestedcommand']) if isinstance(nestedcommand, str): nestedcommand = [nestedcommand] elif nestedcommand is True: logger.error('nestedcommand %r must be a string or list', nestedcommand) abort(503) argument = d['argument'] if not argument: argument = None p = store.paragraph(idx, text, section, d['is_option']) if d['is_option'] and (short or int or argument): p = store.option(p, short, int, expectsarg, argument, nestedcommand) mparagraphs.append(p) if request.form.get('nestedcommand', '').lower() == 'true': m.nestedcommand = True else: m.nestedcommand = False m = mngr.edit(m, mparagraphs) if m: return redirect(url_for('explain', cmd=m.name)) else: abort(503) else: helpers.convertparagraphs(m) for p in m.paragraphs: if isinstance(p, store.option): if isinstance(p.expectsarg, list): p.expectsarg = ', '.join(p.expectsarg) if isinstance(p.nestedcommand, list): p.nestedcommand = ', '.join(p.nestedcommand) return render_template('tagger.html', m=m)
def tag(source): mngr = manager.manager(config.MONGO_URI, 'explainshell', [], False, False) s = mngr.store m = s.findmanpage(source)[0] assert m if 'paragraphs' in request.form: paragraphs = json.loads(request.form['paragraphs']) mparagraphs = [] for d in paragraphs: idx = d['idx'] text = d['text'] section = d['section'] short = [s.strip() for s in d['short']] long = [s.strip() for s in d['long']] expectsarg = _convertvalue(d['expectsarg']) nestedcommand = _convertvalue(d['nestedcommand']) if isinstance(nestedcommand, str): nestedcommand = [nestedcommand] elif nestedcommand is True: logger.error('nestedcommand %r must be a string or list', nestedcommand) abort(503) argument = d['argument'] if not argument: argument = None p = store.paragraph(idx, text, section, d['is_option']) if d['is_option'] and (short or long or argument): p = store.option(p, short, long, expectsarg, argument, nestedcommand) mparagraphs.append(p) if request.form.get('nestedcommand', '').lower() == 'true': m.nestedcommand = True else: m.nestedcommand = False m = mngr.edit(m, mparagraphs) if m: return redirect(url_for('explain', cmd=m.name)) else: abort(503) else: helpers.convertparagraphs(m) for p in m.paragraphs: if isinstance(p, store.option): if isinstance(p.expectsarg, list): p.expectsarg = ', '.join(p.expectsarg) if isinstance(p.nestedcommand, list): p.nestedcommand = ', '.join(p.nestedcommand) return render_template('tagger.html', m=m)
def tag(source): mngr = manager.manager(config.MONGO_URI, 'explainshell', [], False, False) s = mngr.store m = s.findmanpage(source)[0] assert m if 'paragraphs' in request.form: paragraphs = json.loads(request.form['paragraphs']) mparagraphs = [] for d in paragraphs: idx = d['idx'] text = d['text'] section = d['section'] short = [s.strip() for s in d['short']] long = [s.strip() for s in d['long']] if isinstance(d['expectsarg'], list): expectsarg = [s.strip() for s in d['expectsarg']] elif d['expectsarg'].lower() == 'true': expectsarg = True elif d['expectsarg']: expectsarg = d['expectsarg'] else: expectsarg = False argument = d['argument'] if not argument: argument = None p = store.paragraph(idx, text, section, d['is_option']) if d['is_option'] and (short or long or argument): p = store.option(p, short, long, expectsarg, argument) mparagraphs.append(p) m = mngr.edit(m, mparagraphs) if m: return redirect(url_for('explain', program=m.name)) else: abort(503) else: helpers.convertparagraphs(m) for p in m.paragraphs: if isinstance(p, store.option) and isinstance(p.expectsarg, list): p.expectsarg = ', '.join(p.expectsarg) return render_template('tagger.html', m=m)
def test_first_paragraph_no_section(self): m = 'foo\nbar' l = list(manpage._parsetext(m.splitlines())) self.assertEquals(l, [store.paragraph(0, 'foo\nbar', None, False)])