def _get_title(self, root: ET.Element) -> str: title: str = find_text_or_default( root=root, xpath= "bibliographic-data/technical-data/invention-title[@lang='EN']", default='') return title
def _get_tags(self, root: ET.Element) -> List[str]: def convert_tags(orig: str) -> List[str]: """ >>> convert_tags('C01C 03/16') 'C01C' """ return orig.split(' ')[:1] clfs: str = base.find_text_or_default(root, 'PRI-IPC', '') return convert_tags(clfs)
def _get_docid(self, root: ET.Element) -> str: def convert_docid(orig: str) -> str: """ >>> converters('PATENT-US-GRT-1993-05176894') '199305176894' """ return ''.join(orig.split('-')[-2:]) docid: str = base.find_text_or_default(root, 'DOCNO', '') return convert_docid(docid)
def create_name_mapping(self) -> Dict[str, str]: mpg: Dict[str, str] = dict() for fpath in self.iter_query_files(): with open(fpath, 'r') as fin: xml_body: str = self.converter.escape(fin.read()) root: ET.Element = ET.fromstring(xml_body) topic_num: str = find_text_or_default(root, 'NUM', '') doc_root: ET.Element = get_or_raise_exception(root.find('DOC')) docid: str = self.converter._get_docid(doc_root) if topic_num == '' or docid == '': raise AssertionError mpg[topic_num] = docid return mpg
def _get_text(self, root: ET.Element) -> str: text: str = base.find_text_or_default(root, 'SPEC', '') return text
def _get_title(self, root: ET.Element) -> str: title: str = base.find_text_or_default(root, 'TITLE', '') return title