Esempio n. 1
0
def get_alternative_classcode(patent_document):
    classcode = ""
    sdobi = patent_document[0]

    classcode = list(
        map(lambda node: th.handle_class_node(node, 0, 4, 'text'),
            sdobi.iter('classification-ipcr')))

    if classcode:
        if None in classcode:
            classcode.remove(None)
        if "" in classcode:
            classcode.remove("")

        classcode = th.get_flat_list(classcode)
        classcode = th.unique_list(classcode)
        classcode = classcode[:4]
        classcode = th.get_string_from_list(classcode, " ")

    if classcode == "":
        classcode = list(
            map(lambda sub_node: th.handle_class_node(sub_node, 1, 5, 'text'),
                sdobi.iter('B510')))

        if classcode:
            if None in classcode:
                classcode.remove(None)
            if "" in classcode:
                classcode.remove("")

            classcode = th.get_flat_list(classcode)
            classcode = th.unique_list(classcode)
            classcode = classcode[:4]
            classcode = th.get_string_from_list(classcode, " ")
    return classcode
def get_alternative_text(patent_document, marker):
    text = list(map(lambda sub_node : th.get_node_value(sub_node), filter(lambda node: node.tag == marker, patent_document)))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
Esempio n. 3
0
def get_alternative_abstract(node):
    text = list(map(lambda abst: get_nested_text(abst), node))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
Esempio n. 4
0
def get_alternative_description(node):
    text = list(map(lambda desc: get_nested_text(desc), node))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
Esempio n. 5
0
def txt_basic_information(file, kind, classcode, applicant, abstract):
    file.write(kind + "\n")
    file.write(classcode + "\n")
    file.write(applicant + "\n")
    if abstract != None:
        file.write(th.get_string_from_list(abstract, ' ') + "\n")
    else:
        file.write("\n")
Esempio n. 6
0
def get_alternative_claim(node):
    text = list(
        map(lambda claim: get_nested_text(claim), node.iter('claim-text')))
    if text:
        if None in text:
            text.remove(None)
        return get_claim_type(node.attrib), th.get_string_from_list(text, " ")
    return get_claim_type(node.attrib), ""
Esempio n. 7
0
def get_alternative_applicant(patent_document):
    applicant = ""
    sdobi = patent_document[0]
    text = list(
        map(lambda node: th.handle_ending_node(node, 'snm'),
            sdobi.iter('B711')))
    if text:
        if None in text:
            text.remove(None)
        text = th.get_flat_list(text)
        return th.get_string_from_list(text, ",")
    return ""
Esempio n. 8
0
def get_alternative_title(node):
    text = list(
        map(
            lambda sub_node: th.get_node_value(sub_node.getnext()),
            filter(
                lambda sub_node: th.get_node_value(sub_node).upper() == 'EN',
                node.iter('B541'))))
    if text:
        if None in text:
            text.remove(None)
        return th.get_string_from_list(text, " ")
    return ""
Esempio n. 9
0
def get_alternative_citations(patent_document):
    citations = ""
    sdobi = patent_document[0]
    citations = list(
        map(lambda node: th.handle_citation_node(node, 'B565EP', 'B561'),
            sdobi.iter('B560')))
    if citations:
        if None in citations:
            citations.remove(None)
        citations = th.get_flat_super_list(citations)
        citations = th.unique_list(citations)
        return th.get_string_from_list(citations, " ")
    return ""
def patent_classifications(patent):
    classifications = ""
    try:
        classifications = list(map(lambda t_classification : t_classification["section"]+t_classification["class"]+t_classification["subclass"], patent["classification-ipc"]))
        if classifications:
            classifications = np.unique(classifications)

            # return classifications, patent["classification-national-main"]
            # alternative
            return th.get_string_from_list(classifications, ' '), patent["classification-national-main"]
        return None, patent["classification-national-main"]
    except:
        return None, patent["classification-national-main"]
Esempio n. 11
0
def get_alternative_country(patent_document):
    country = ""
    sdobi = patent_document[0]

    countries = list(
        map(lambda node: th.handle_ending_node(node, 'ctry'),
            sdobi.iter('B330')))

    if countries:
        if None in countries:
            countries.remove(None)

        countries = th.get_flat_list(countries)
        countries = th.unique_list(countries)
        return th.get_string_from_list(countries, " ")
    return ""
Esempio n. 12
0
def txt_text_information(file, claim, description):
    file.write(th.get_string_from_list(claim, ' ') + "\n")
    file.write(th.get_string_from_list(description, ' ') + "\n")
def organize_processed_patent(patent, dtd_version):
    new_patent = {}
    # if the patent does not have an ipc-classification it cannot be used for
    # classification and is therefore removed and no longer processed
    if ("classification-ipc" not in patent.keys() or
        "claims" not in patent.keys() or
        "description" not in patent.keys()):
        return None
    try:
        # go through all the values for each tag name of the patent
        for tag_name, values in patent.items():
            new_patent[tag_name] = []
            proccesed_values = []
            for val in values:
                # remove newline, empty and None entries
                if (type(val) != str or not re.match("(^\\n)", val)) and val is not None:
                    if re.match("^classification", tag_name) or tag_name == "references-cited":
                        val = re.sub("\s+?", "", val) # remove the whitespaces
                    proccesed_values.append(val)
                    new_patent[tag_name].append(val)
            # save each ipc-classification of the patent as a list of dictionaries. each dictionary containing
            # it's secition, class and subclass value
            if (tag_name == "classification-ipc"):
                if(dtd_version == 2):
                    for value in proccesed_values:
                        if not re.match("^[A-Z].*", value):
                            return None
                values_text=th.get_string_from_list(th.tokenize_text(th.get_string_from_list(new_patent[tag_name], '')),'')
                # values_text = "".join("".join(new_patent[tag_name]).split())
                new_patent[tag_name] = list(map(lambda x : {"section": x[0], "class": x[1:3], "subclass": x[3]}, re.findall("([A-H][0-9]{2}[A-Z][0-9]{2,4})", values_text)))

            # save each inventors of the patent as a dictionary containing: firstname,lastname,city,country
            if (tag_name == "inventors"):
                num_elements = len(new_patent[tag_name])
                if num_elements % 4 != 0:
                    num_elements = num_elements - (num_elements % 4)
                # new_patent[tag_name] = ", ".join(list(map(lambda i : new_patent[tag_name][i] + " " + new_patent[tag_name][i+1], range(0, num_elements, 4))))
                new_patent[tag_name] = th.get_string_from_list(list(map(lambda i : new_patent[tag_name][i] + " " + new_patent[tag_name][i+1], range(0, num_elements, 4))), ', ')

            # save each inventors of the patent as a dictionary containing: firstname,lastname,city,country
            if (tag_name == "references-cited"):
                new_patent[tag_name] = th.get_string_from_list(list(map(lambda element:element, new_patent[tag_name]))), ' ')
                # new_patent[tag_name] = " ".join(list(map(lambda element : element, new_patent[tag_name])))

            # tag names that don't have more than one value are changed from a list to a single value
            if (tag_name in ["invention-title", "classification-national-main", "patent-country", "patent-date", "patent-kind", "patent-doc-number"]):
                try:
                    new_patent[tag_name] = new_patent[tag_name][0]
                except:
                    new_patent[tag_name] = ''

            if (tag_name == "patent-lang"):
                new_patent[tag_name]=th.get_string_from_list(th.tokenize_text(th.get_string_from_list(new_patent[tag_name], '')),'')
                # new_patent[tag_name] = "".join("".join(new_patent[tag_name]).split())
        return new_patent
    except Exception as e:
        print("new error occurred - processsing patent. Error:", e)
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
        return None