#creating regex for state names for i in range(1, 3): var_edit = "regex_state_edit_" + str(i) var = "regex_state_" + str(i) vars()[var] = "" vars()[var_edit] = "" with open('states_' + str(i) + '.txt', 'r') as states: for state in states: state = re.sub(r"\(.*\)", " ", state) state_edit = "" state = re.sub("\n", "", state) vars()[var] += r"\b" + state + r"\b|" if len(str(state)) <= 4: continue for state_edit in spellMistakes(state): if state_edit == "Nagar" or len(state_edit) <= 4: continue vars()[var_edit] += r"\b" + state_edit + r"\b|" vars()[var] = vars()[var][:len(vars()[var]) - 1] vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit]) - 1] #location of test file test_file_loc = "testAddress.json" with open(test_file_loc, 'r') as file: all_add = json.load(file) #declaring ml variables #maximum no of predictions made for a given address no_of_predictions = 5
for i in range(1, 7): var_edit = "regex_city_edit_" + str(i) var = "regex_city_" + str(i) vars()[var] = "" vars()[var_edit] = "" with open('tier_cities\\tier' + str(i) + 'cities.txt', 'r') as cities: for city in cities: city = re.sub(r"\(.*\)", " ", city) city_edit = "" city = re.sub("\n", "", city) if city == "Nagar": continue vars()[var] += r"\b" + city + r"\b|" if len(str(city)) <= 4: continue for city_edit in spellMistakes(city): if city_edit == "Nagar" or len(city_edit) <= 4: continue vars()[var_edit] += r"\b" + city_edit + r"\b|" vars()[var] = vars()[var][:len(vars()[var]) - 1] vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit]) - 1] index = -1 #looping through all addresses and predicting for dictionary in all_add: actual_city = dictionary["city"] add = dictionary["address"] #removing utf-16 encoding from the address add = re.sub(r"\\u....", " ", add)
#creating regex for state names for i in range(1,3) : var_edit = "regex_state_edit_"+str(i) var = "regex_state_"+str(i) vars()[var] = "" vars()[var_edit] = "" with open('states_'+str(i)+'.txt','r') as states : for state in states : state = re.sub(r"\(.*\)"," ",state) state_edit = "" state = re.sub("\n","",state) vars()[var] += r"\b" + state + r"\b|" if len(str(state)) <= 4 : continue for state_edit in spellMistakes(state) : if state_edit == "Nagar" or len(state_edit) <= 4: continue vars()[var_edit] += r"\b" + state_edit + r"\b|" vars()[var] = vars()[var][:len(vars()[var])-1] vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit])-1] #location of test file test_file_loc = "trainAddress.json" with open(test_file_loc,'r') as file : all_add = json.load(file) #declaring ml variables #maximum no of predictions made for a given address
for i in range(1,7) : var_edit = "regex_city_edit_"+str(i) var = "regex_city_"+str(i) vars()[var] = "" vars()[var_edit] = "" with open('tier_cities\\tier' + str(i) + 'cities.txt','r') as cities : for city in cities : city = re.sub(r"\(.*\)"," ",city) city_edit = "" city = re.sub("\n","",city) if city == "Nagar" : continue vars()[var] += r"\b" + city + r"\b|" if len(str(city)) <= 4 : continue for city_edit in spellMistakes(city) : if city_edit == "Nagar" or len(city_edit) <= 4: continue vars()[var_edit] += r"\b" + city_edit + r"\b|" vars()[var] = vars()[var][:len(vars()[var])-1] vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit])-1] index = -1 #looping through all addresses and predicting for dictionary in all_add : actual_city = dictionary["city"] add = dictionary["address"] #removing utf-16 encoding from the address add = re.sub(r"\\u...."," ",add)