def replace_markerless(self, stack, node, depth): """Assign a unique index to all of the MARKERLESS paragraphs""" if node.label[-1] == mtypes.MARKERLESS: keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False) if keyterm: p_num = keyterm_to_int(keyterm) else: # len(n.label[-1]) < 6 filters out keyterm nodes p_num = sum(n.is_markerless() and len(n.label[-1]) < 6 for n in stack.peek_level(depth)) + 1 node.label[-1] = 'p{}'.format(p_num)
par_list[1] = match.section elif match.appendix: par_list[1] = "Appendix:" + match.appendix # Set paragraph depths for p in match_list[2:]: par_list[match_list.index(p)] = p par = tokens.Paragraph(par_list) return [par] _keyterm_label_part = ( Suppress(Marker("keyterm")) + QuotedString(quoteChar='(', endQuoteChar=')') ).setParseAction(lambda m: "p{}".format(keyterm_to_int(m[0]))) _simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase + string.digits) _label_part = _keyterm_label_part | _simple_label_part override_label = ( Suppress("[") + Marker("label") + Suppress(":") + atomic.part + Suppress("-") + (atomic.section | atomic.appendix) + ZeroOrMore(Suppress("-") + _label_part) + Suppress("]") ).setParseAction(tokenize_override_ps) # Looks like: [subject-group(Some text Goes Here)]
def test_keyterm_to_int(self): """keyterm_to_int should standardize the keyterm""" self.assertEqual(keyterm_to_int('Abc 123 More.'), keyterm_to_int(' abc123 mOrE')) self.assertTrue(keyterm_to_int('a term') > 10000)