Beispiel #1
0
def depatisconnect_alternatives(number):
    """reverse "fix_patent" for DE documents"""

    # always add original number first
    numbers = [number]

    patent = split_patent_number(number)
    if patent['country'] == 'DE':
        if not patent['number'].isdigit():
            return [join_patent(patent)]

        patent_number = int(patent['number'])
        # e.g. DE000000121107A, DE000000801283B
        if patent_number < 1000000:
            if patent['kind'] == 'C':
                patent['kind'] = 'B'
                numbers.append(join_patent(patent))
                patent['kind'] = 'A'
                numbers.append(join_patent(patent))

        # e.g. DE000001020931A
        elif 1000000 <= patent_number < 1400000:
            #numbers.append(join_patent(patent))
            pass

        # e.g. DE000002363448A
        elif 1400000 <= patent_number:
            if patent['kind'] == 'A1':
                patent['kind'] = 'A'
                numbers.append(join_patent(patent))

    return numbers
Beispiel #2
0
def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False, for_ops=True, provider=None):

    if provider is None and for_ops is True:
        provider = 'ops'

    # 1. handle patent dicts or convert (split) from string
    if isinstance(number, types.DictionaryType):
        patent = number
    else:
        patent = split_patent_number(number)

    # 2.a. normalize patent dict
    patent_normalized = patch_patent(patent, provider=provider)

    # 2.b. apply fixes
    if fix_kindcode:
        fix_patent_kindcode_ops(patent_normalized)

    # 3. result handling

    # 3.a) default mechanism: return what we've got
    if isinstance(number, types.DictionaryType):
        result = patent_normalized
    else:
        result = join_patent(patent_normalized)

    # 3.b) extended mechanism: return what we are requested for
    if as_dict:
        result = patent_normalized
    elif as_string:
        result = join_patent(patent_normalized)

    return result
Beispiel #3
0
def test_denormalization():

    payload = """
WO2002051230
WO2002051231
WO2006113621A3
WO1998016331A3
WO2000001014A1
WO2001002000A3
WO1999012345
WO1999123456
WO2001012345
WO2001098623A1
WO2001098623A1
WO2001098623A1
WO2001098623A1
WO2003107732
WO2003107732
WO2004000001
WO1999013800
WO1999023997
WO1990004917
WO2000027301
WO2000000748
WO2003043359
WO2003107520
WO2007054055
---
WO1990004917
"""

    print "-" * 30
    print "original\tdenormalized"
    print "-" * 30
    for number in payload.split("\n"):
        if not number or number == "\n": continue
        if number.startswith('---'):
            print number
            continue
        number_denormalized = join_patent(
            denormalize_patent(split_patent_number(number)))
        print "%s\t%s" % (number, number_denormalized)