Python marker_ofの例、regparser.layer.paragraph_markers.marker_of Pythonの例

コード例 #1

0

ファイルを表示

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions
                            and cls.is_definition(node, keyterm)):
            return keyterm

コード例 #2

0

ファイルを表示

    def process_node_text(node):
        """Take a paragraph, remove the marker, and extraneous whitespaces."""
        marker = marker_of(node)
        text = node.tagged_text

        text = text.replace(marker, '', 1).strip()
        return text

コード例 #3

0

ファイルを表示

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

コード例 #4

0

ファイルを表示

ファイル: changes.py プロジェクト: jmcarp/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = 'KEEP'

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

コード例 #5

0

ファイルを表示

ファイル: changes.py プロジェクト: jposi/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    nodes = [format_node(n, amendment) for n in nodes_list]
    label = amendment['node'].label_id()
    root_change = [d for d in nodes if label in d][0][label]

    text = amendment['node'].text.strip()
    marker = marker_of(amendment['node'])
    text = text[len(marker):].strip()
    # Text is stars, but this is marked as edited -- assume we are only
    # modifying the child paragraphs
    if text == '* * *':
        root_change['field'] = '[children]'

    # If text ends with a colon and is followed by stars, assume we are only
    # modifying the intro text
    if text[-1:] == ':' and amendment['node'].source_xml is not None:
        following = amendment['node'].source_xml.getnext()
        if following is not None and following.tag == 'STARS':
            root_change['field'] = '[text]'

    return nodes

コード例 #6

0

ファイルを表示

ファイル: key_terms.py プロジェクト: eregs/regulations-parser

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm

コード例 #7

0

ファイルを表示

ファイル: key_terms.py プロジェクト: anthonygarvan/regulations-parser

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = (getattr(node, 'tagged_text', None) or '')
        tagged = tagged.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm

コード例 #8

0

ファイルを表示

 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))

コード例 #9

0

ファイルを表示

ファイル: layer_paragraph_markers_tests.py プロジェクト: eregs/regulations-parser

 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))

コード例 #10

0

ファイルを表示

ファイル: compiler.py プロジェクト: kaitlin/regulations-parser

def overwrite_marker(origin, new_label):
    """ The node passed in has a label, but we're going to give it a
    new one (new_label). This is necessary during node moves.  """
    marker = marker_of(origin)
    if '(' in marker:
        origin.text = origin.text.replace(marker, '({0})'.format(new_label), 1)
    elif marker:
        origin.text = origin.text.replace(marker, '{0}.'.format(new_label), 1)
    else:
        logger.warning("Cannot replace marker in %s", origin.text)

    return origin

コード例 #11

0

ファイルを表示

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:  # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [
        c for c in changes if any(v['action'] == 'PUT' for v in c.values())
    ]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

コード例 #12

0

ファイルを表示

ファイル: changes.py プロジェクト: tadhg-ohiggins/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:   # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [c for c in changes
            if any(v['action'] == 'PUT' for v in c.values())]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label and
                node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes