Esempio n. 1
0
def main():
    filename = '2018-09-21 15694060 nonfinal rejection.txt'
    #tokenize_test()
    #sentence_seg()

    data = file_reader.getStringFromTxt(filename)
    clean_oa, numsubs = clean_OA(data)

    sentence_seg(clean_oa)
Esempio n. 2
0
def main_old():
    filename = ""
    invalid_input = True

    while invalid_input:
        user_input = input("Convert PDF (Y/N)?")

        if user_input.lower() == "y":
            user_input = input("Provide PDF filename:")
            filename = user_input
            output_path = convert_pdf_to_txt(filename)

            invalid_input = False
        elif user_input.lower() == "n":
            filename = "test.pdf"
            output_path = "output/test.txt"
            invalid_input = False
        else:
            print("did not understand")
            invalid_input = True

    data = file_reader.getStringFromTxt(output_path)
    clean_data = data.replace('\n\n', '\n')
    data_split_space = data.split('\n')
    print(clean_data)
    print(data_split_space)
    print(len(data_split_space))

    oa1 = OfficeAction()

    regex = r'.+Claim.+rejected\sunder.+'
    test = []

    for i in range(len(data_split_space)):
        temp = ''
        if re.match(regex, data_split_space[i]):
            rej = Rejection()
            for j in range(i, i + 5):
                temp += ' ' + data_split_space[j]
                #print(data_split_space[j])
                if (data_split_space[j].strip().endswith('.')):
                    #print(data_split_space[j])
                    break
            #print(temp)
            test.append(temp)

            rej.rejectionText = temp

            matchObj = re.search(r'(Claim.+)(?:\s)(?:is|are)', temp)
            #matchObj = re.search(r'(Claim.+)(?:(\s(is|are)))',temp)
            print(type(matchObj))
            print(rej.claims_refs)
            if matchObj:
                #print(temp)
                #print('yes')
                print(matchObj.groups())
                print(matchObj.group(1))
                rej.claims_refs[matchObj.group(1)] = None
                rej.claims = matchObj.group(1)

            oa1.rejections.append(rej)

    #print(test)
    for r in oa1.rejections:
        print(r.rejectionText)
        print(r.claims_refs)
        print(r.claims)
Esempio n. 3
0
def tokenize_test():
    data = file_reader.getStringFromTxt(
        '2018-09-21 15694060 nonfinal rejection.txt')
    #print(data)
    words = nltk.tokenize.word_tokenize(data)
Esempio n. 4
0
	20.   The method of claim 16, wherein each of the plurality of memory devices is configured to determine a resistance and a target output high level voltage of an output driver by performing the impedance calibration operation, and
	wherein the output driver is configured to output data externally to each of the plurality of memory devices.  
"""

#print(test)

#matchObj = re.match(r'[0-9]\.',test)

#print(matchObj.group())

#list = re.findall(r'[0-9]{1,2}\..*\n\n',test,re.M|re.S)

claims = {}
filename = '8836S-1189 claims.txt'
claimsString = file_reader.getStringFromTxt(filename)

list = re.findall(r'[0-9]{1,2}\..*$', claimsString, re.M)

print(list)
print(len(list))

for i in list:
    claimNo = re.match(r'[0-9]{1,2}', i).group(0)
    print(claimNo)
    #print(type(claimNo))
    #matchObj = re.search(r'claim [0-9]{1,2}',i,re.I)

    claims[claimNo] = []

    depClaimRegex = r'(claim) (\d+)'