Ejemplo n.º 1
0
def get_classcode(patent_document):
    classcode = ""
    sdobi = patent_document[0]
    for index, sub_node in enumerate(sdobi.iter('classification-ipcr')):
        if len(classcode.split()) == 4:
            break
        for class_ in sub_node:
            if class_.tag == 'text':
                temp = th.get_node_value(class_)
                temp = temp.replace(" ", "")
                temp = temp.replace("/", "")
                if len(temp) > 3:
                    if classcode == "":
                        classcode = temp[:4]
                    elif temp[:4] not in classcode:
                        classcode += " " + temp[:4]
    if classcode == "":
        for sub_node in sdobi.iter('B510'):
            for class_ in sub_node:
                temp = th.get_node_value(class_)
                temp = temp.replace(" ", "")
                temp = temp.replace("/", "")
                if len(temp) > 3:
                    if classcode == "":
                        classcode = temp[1:5]
                    elif temp[1:5] not in classcode:
                        classcode += " " + temp[1:5]
    return classcode
Ejemplo n.º 2
0
def get_citations(patent_document):  # B500, B550, B560
    citations = ""
    sdobi = patent_document[0]
    for node in sdobi.iter('B560'):
        # snm, iid (number of opponent), irf (), adr
        for sub_node in node:
            if sub_node.tag == 'B561':
                for cit in sub_node:
                    if cit.tag == 'text':
                        if len(citations) != 0:
                            temp_citation = th.get_node_value(cit)
                            temp_citation = temp_citation.replace(" ", "")
                            temp_citation = temp_citation[temp_citation.
                                                          rfind('-') + 1:]
                            citations += " " + temp_citation
                        else:
                            temp_citation = th.get_node_value(cit)
                            temp_citation = temp_citation.replace(" ", "")
                            temp_citation = temp_citation[temp_citation.
                                                          rfind('-') + 1:]
                            citations += temp_citation
            elif sub_node.tag == 'B565EP':
                for cit in sub_node:
                    if cit.tag == 'date':
                        if len(citations) != 0:
                            citations += " " + th.get_node_value(cit)
                        else:
                            citations += th.get_node_value(cit)
    return citations
Ejemplo n.º 3
0
def get_country(patent_document):
    country = ""
    sdobi = patent_document[0]
    for sub_node in sdobi.iter('B330'):
        for ctry in sub_node:
            if ctry.tag == 'ctry' and th.get_node_value(ctry) not in country:
                if len(country) != 0:
                    country += " " + th.get_node_value(ctry)
                else:
                    country += th.get_node_value(ctry)
    return country
Ejemplo n.º 4
0
def get_applicant(patent_document):
    applicant = ""
    sdobi = patent_document[0]
    for sub_node in sdobi.iter('B711'):
        # snm, iid (number of opponent), irf (), adr
        for appl in sub_node:
            if appl.tag == 'snm':
                if len(applicant) != 0:
                    applicant += "," + th.get_node_value(appl)
                else:
                    applicant += th.get_node_value(appl)
    return applicant
Ejemplo n.º 5
0
def get_alternative_title(node):
    text = list(
        map(
            lambda sub_node: th.get_node_value(sub_node.getnext()),
            filter(
                lambda sub_node: th.get_node_value(sub_node).upper() == 'EN',
                node.iter('B541'))))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
def get_alternative_text(patent_document, marker):
    text = list(map(lambda sub_node : th.get_node_value(sub_node), filter(lambda node: node.tag == marker, patent_document)))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
Ejemplo n.º 7
0
def get_nested_text(node):
    text = ""
    if not node.tag is etree.Comment:
        remove_tags(node)
        text = th.get_node_value(node)
        if text == None:
            return ""
    return text
def get_text(patent_document, marker):
    for node in patent_document:
        if node.tag == marker:
            # print("original: ", th.get_node_value(node))
            return th.get_node_value(node)
    return ""
Ejemplo n.º 9
0
def get_title(node):
    text = ""
    for sub_node in node.iter('B541'):
        if th.get_node_value(sub_node).upper() == 'EN':
            text = th.get_node_value(sub_node.getnext())
    return text