コード例 #1
0
    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions
                            and cls.is_definition(node, keyterm)):
            return keyterm
コード例 #2
0
    def process_node_text(node):
        """Take a paragraph, remove the marker, and extraneous whitespaces."""
        marker = marker_of(node)
        text = node.tagged_text

        text = text.replace(marker, '', 1).strip()
        return text
コード例 #3
0
def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes
コード例 #4
0
ファイル: changes.py プロジェクト: jmcarp/regulations-parser
def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = 'KEEP'

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes
コード例 #5
0
ファイル: changes.py プロジェクト: jposi/regulations-parser
def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    nodes = [format_node(n, amendment) for n in nodes_list]
    label = amendment['node'].label_id()
    root_change = [d for d in nodes if label in d][0][label]

    text = amendment['node'].text.strip()
    marker = marker_of(amendment['node'])
    text = text[len(marker):].strip()
    # Text is stars, but this is marked as edited -- assume we are only
    # modifying the child paragraphs
    if text == '* * *':
        root_change['field'] = '[children]'

    # If text ends with a colon and is followed by stars, assume we are only
    # modifying the intro text
    if text[-1:] == ':' and amendment['node'].source_xml is not None:
        following = amendment['node'].source_xml.getnext()
        if following is not None and following.tag == 'STARS':
            root_change['field'] = '[text]'

    return nodes
コード例 #6
0
ファイル: key_terms.py プロジェクト: eregs/regulations-parser
    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm
コード例 #7
0
    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = (getattr(node, 'tagged_text', None) or '')
        tagged = tagged.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm
コード例 #8
0
 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))
コード例 #9
0
 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))
コード例 #10
0
def overwrite_marker(origin, new_label):
    """ The node passed in has a label, but we're going to give it a
    new one (new_label). This is necessary during node moves.  """
    marker = marker_of(origin)
    if '(' in marker:
        origin.text = origin.text.replace(marker, '({0})'.format(new_label), 1)
    elif marker:
        origin.text = origin.text.replace(marker, '{0}.'.format(new_label), 1)
    else:
        logger.warning("Cannot replace marker in %s", origin.text)

    return origin
コード例 #11
0
def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:  # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [
        c for c in changes if any(v['action'] == 'PUT' for v in c.values())
    ]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes
コード例 #12
0
def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:   # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [c for c in changes
            if any(v['action'] == 'PUT' for v in c.values())]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label and
                node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes