def get_classcode(patent_document): classcode = "" sdobi = patent_document[0] for index, sub_node in enumerate(sdobi.iter('classification-ipcr')): if len(classcode.split()) == 4: break for class_ in sub_node: if class_.tag == 'text': temp = th.get_node_value(class_) temp = temp.replace(" ", "") temp = temp.replace("/", "") if len(temp) > 3: if classcode == "": classcode = temp[:4] elif temp[:4] not in classcode: classcode += " " + temp[:4] if classcode == "": for sub_node in sdobi.iter('B510'): for class_ in sub_node: temp = th.get_node_value(class_) temp = temp.replace(" ", "") temp = temp.replace("/", "") if len(temp) > 3: if classcode == "": classcode = temp[1:5] elif temp[1:5] not in classcode: classcode += " " + temp[1:5] return classcode
def get_citations(patent_document): # B500, B550, B560 citations = "" sdobi = patent_document[0] for node in sdobi.iter('B560'): # snm, iid (number of opponent), irf (), adr for sub_node in node: if sub_node.tag == 'B561': for cit in sub_node: if cit.tag == 'text': if len(citations) != 0: temp_citation = th.get_node_value(cit) temp_citation = temp_citation.replace(" ", "") temp_citation = temp_citation[temp_citation. rfind('-') + 1:] citations += " " + temp_citation else: temp_citation = th.get_node_value(cit) temp_citation = temp_citation.replace(" ", "") temp_citation = temp_citation[temp_citation. rfind('-') + 1:] citations += temp_citation elif sub_node.tag == 'B565EP': for cit in sub_node: if cit.tag == 'date': if len(citations) != 0: citations += " " + th.get_node_value(cit) else: citations += th.get_node_value(cit) return citations
def get_country(patent_document): country = "" sdobi = patent_document[0] for sub_node in sdobi.iter('B330'): for ctry in sub_node: if ctry.tag == 'ctry' and th.get_node_value(ctry) not in country: if len(country) != 0: country += " " + th.get_node_value(ctry) else: country += th.get_node_value(ctry) return country
def get_applicant(patent_document): applicant = "" sdobi = patent_document[0] for sub_node in sdobi.iter('B711'): # snm, iid (number of opponent), irf (), adr for appl in sub_node: if appl.tag == 'snm': if len(applicant) != 0: applicant += "," + th.get_node_value(appl) else: applicant += th.get_node_value(appl) return applicant
def get_alternative_title(node): text = list( map( lambda sub_node: th.get_node_value(sub_node.getnext()), filter( lambda sub_node: th.get_node_value(sub_node).upper() == 'EN', node.iter('B541')))) if text: if None in text: text.remove(None) return th.get_string_from_list(text, " ") return ""
def get_alternative_text(patent_document, marker): text = list(map(lambda sub_node : th.get_node_value(sub_node), filter(lambda node: node.tag == marker, patent_document))) if text: if None in text: text.remove(None) return th.get_string_from_list(text, " ") return ""
def get_nested_text(node): text = "" if not node.tag is etree.Comment: remove_tags(node) text = th.get_node_value(node) if text == None: return "" return text
def get_text(patent_document, marker): for node in patent_document: if node.tag == marker: # print("original: ", th.get_node_value(node)) return th.get_node_value(node) return ""
def get_title(node): text = "" for sub_node in node.iter('B541'): if th.get_node_value(sub_node).upper() == 'EN': text = th.get_node_value(sub_node.getnext()) return text