def extract_uitspraak(ruling_tree): from lxml.etree import XPath xpath_strs = [ "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/" "ancestor::rvr:section/descendant-or-self::text() | " "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/" "ancestor::rvr:section/following-sibling::rvr:section/descendant-or-self::text()", '/open-rechtspraak/rvr:uitspraak/rvr:section[@role="beslissing"][last()]/descendant-or-self::text()', "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:title/text()[contains(., 'eslissing')][last()]//" "ancestor::rvr:section/descendant-or-self::text()" ] xpaths = (XPath( xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) >= 1: if len(items) <= 3: return filter_out_wijzers( clean(items[0])) # TODO: if != '' ; precedence of XPaths elif len(items) > 3: assert False else: return None
def extract_standpunt_adv(ruling_tree): from lxml.etree import XPath xpath_strs = [ "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:parablock/rvr:para/" "descendant-or-self::*[contains(text(), 'standpunt van de verdediging')]/" "ancestor::rvr:para/following-sibling::rvr:para/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:paragroup/" "descendant-or-self::*[contains(text(), 'standpunt van de Verdediging') " # TODO: unneeded "or contains(text(), 'standpunt van de verdediging')" "or contains(text(), 'standpunt van verdediging')]/" # TODO: unneeded "parent::*/descendant::rvr:parablock/descendant::*/text()" ] # , # "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/" # "rvr:emphasis[text()='Het standpunt van de verdediging']/" # "ancestor::rvr:para/following-sibling::rvr:*/text()"] xpaths = (XPath( xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) == 1: return clean(items[0]) elif len(items) > 1: assert False else: return None
def extract_uitspraak(ruling_tree): from lxml.etree import XPath xpath_strs = [ "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/" "ancestor::rvr:section/descendant-or-self::text() | " "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/" "ancestor::rvr:section/following-sibling::rvr:section/descendant-or-self::text()", '/open-rechtspraak/rvr:uitspraak/rvr:section[@role="beslissing"][last()]/descendant-or-self::text()', "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:title/text()[contains(., 'eslissing')][last()]//" "ancestor::rvr:section/descendant-or-self::text()" ] xpaths = (XPath(xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) >= 1: if len(items) <= 3: return filter_out_wijzers(clean( items[0])) # TODO: if != '' ; precedence of XPaths elif len(items) > 3: assert False else: return None
def extract_standpunt_adv(ruling_tree): from lxml.etree import XPath xpath_strs = [ "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:parablock/rvr:para/" "descendant-or-self::*[contains(text(), 'standpunt van de verdediging')]/" "ancestor::rvr:para/following-sibling::rvr:para/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:paragroup/" "descendant-or-self::*[contains(text(), 'standpunt van de Verdediging') " # TODO: unneeded "or contains(text(), 'standpunt van de verdediging')" "or contains(text(), 'standpunt van verdediging')]/" # TODO: unneeded "parent::*/descendant::rvr:parablock/descendant::*/text()" ] # , # "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/" # "rvr:emphasis[text()='Het standpunt van de verdediging']/" # "ancestor::rvr:para/following-sibling::rvr:*/text()"] xpaths = (XPath(xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) == 1: return clean(items[0]) elif len(items) > 1: assert False else: return None
def extract_tenlastelegging(ruling_tree): from lxml.etree import XPath xpaths = (XPath( "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:title/" "descendant-or-self::*/text()[contains(., 'tenlastelegging')]/ancestor::rvr:section" "//*[not(local-name()='nr')]/text()", namespaces=NAMESPACE_PREFIX_MAP), ) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) == 1: return clean(items[0]) else: return None
def extract_standpunt_ovj(ruling_tree): from lxml.etree import XPath xpath_strs = ( "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:parablock/rvr:para/" "descendant-or-self::*[contains(text(), 'standpunt van de officier van justitie') " "or contains(text(), 'standpunt van de officieren van justitie') " "or contains(text(), 'standpunt van het Openbaar Ministerie') " "or contains(text(), 'standpunt van het openbaar ministerie')]/" "ancestor::rvr:para/following-sibling::rvr:para/text() | " "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/rvr:emphasis/" "descendant-or-self::*/text()[contains(., ' eis van de officier van justitie') or " "contains(., ' eis van de officieren van justitie')]/" "ancestor::rvr:para/following-sibling::rvr:para/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:paragroup/" "descendant-or-self::*[contains(text(), 'standpunt van de officier van justitie') " "or contains(text(), 'standpunt van de officieren van justitie') " "or contains(text(), 'standpunt van het Openbaar Ministerie') " "or contains(text(), 'standpunt van het openbaar ministerie')]/" "parent::*/descendant::rvr:parablock/descendant::*/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/" # TODO: remove, strafoplegging != bewijs "rvr:emphasis[text()='De vordering van de officier van justitie' or " "text()='De vordering van de officieren van justitie']/" "ancestor::rvr:para/following-sibling::rvr:*/text()" # , # "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:parablock/rvr:para/rvr:emphasis" , ) xpaths = (XPath( xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) > 0: if len(items) <= 2: return clean(items[0]) elif len(items) > 2: assert False else: return None
def extract_standpunt_ovj(ruling_tree): from lxml.etree import XPath xpath_strs = ( "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:parablock/rvr:para/" "descendant-or-self::*[contains(text(), 'standpunt van de officier van justitie') " "or contains(text(), 'standpunt van de officieren van justitie') " "or contains(text(), 'standpunt van het Openbaar Ministerie') " "or contains(text(), 'standpunt van het openbaar ministerie')]/" "ancestor::rvr:para/following-sibling::rvr:para/text() | " "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/rvr:emphasis/" "descendant-or-self::*/text()[contains(., ' eis van de officier van justitie') or " "contains(., ' eis van de officieren van justitie')]/" "ancestor::rvr:para/following-sibling::rvr:para/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:paragroup/" "descendant-or-self::*[contains(text(), 'standpunt van de officier van justitie') " "or contains(text(), 'standpunt van de officieren van justitie') " "or contains(text(), 'standpunt van het Openbaar Ministerie') " "or contains(text(), 'standpunt van het openbaar ministerie')]/" "parent::*/descendant::rvr:parablock/descendant::*/text()", "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/" # TODO: remove, strafoplegging != bewijs "rvr:emphasis[text()='De vordering van de officier van justitie' or " "text()='De vordering van de officieren van justitie']/" "ancestor::rvr:para/following-sibling::rvr:*/text()" # , # "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:parablock/rvr:para/rvr:emphasis" , ) xpaths = (XPath(xpath_str, namespaces=NAMESPACE_PREFIX_MAP) for xpath_str in xpath_strs) items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths) if len(items) > 0: if len(items) <= 2: return clean(items[0]) elif len(items) > 2: assert False else: return None