Esempio n. 1
0
#creating regex for state names
for i in range(1, 3):
    var_edit = "regex_state_edit_" + str(i)
    var = "regex_state_" + str(i)
    vars()[var] = ""
    vars()[var_edit] = ""
    with open('states_' + str(i) + '.txt', 'r') as states:
        for state in states:
            state = re.sub(r"\(.*\)", " ", state)
            state_edit = ""
            state = re.sub("\n", "", state)
            vars()[var] += r"\b" + state + r"\b|"
            if len(str(state)) <= 4:
                continue
            for state_edit in spellMistakes(state):
                if state_edit == "Nagar" or len(state_edit) <= 4:
                    continue
                vars()[var_edit] += r"\b" + state_edit + r"\b|"
    vars()[var] = vars()[var][:len(vars()[var]) - 1]
    vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit]) - 1]

#location of test file
test_file_loc = "testAddress.json"

with open(test_file_loc, 'r') as file:
    all_add = json.load(file)

#declaring ml variables
#maximum no of predictions made for a given address
no_of_predictions = 5
for i in range(1, 7):
    var_edit = "regex_city_edit_" + str(i)
    var = "regex_city_" + str(i)
    vars()[var] = ""
    vars()[var_edit] = ""
    with open('tier_cities\\tier' + str(i) + 'cities.txt', 'r') as cities:
        for city in cities:
            city = re.sub(r"\(.*\)", " ", city)
            city_edit = ""
            city = re.sub("\n", "", city)
            if city == "Nagar":
                continue
            vars()[var] += r"\b" + city + r"\b|"
            if len(str(city)) <= 4:
                continue
            for city_edit in spellMistakes(city):
                if city_edit == "Nagar" or len(city_edit) <= 4:
                    continue
                vars()[var_edit] += r"\b" + city_edit + r"\b|"
    vars()[var] = vars()[var][:len(vars()[var]) - 1]
    vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit]) - 1]

index = -1
#looping through all addresses and predicting
for dictionary in all_add:

    actual_city = dictionary["city"]
    add = dictionary["address"]

    #removing utf-16 encoding from the address
    add = re.sub(r"\\u....", " ", add)
#creating regex for state names
for i in range(1,3) :
    var_edit = "regex_state_edit_"+str(i)
    var = "regex_state_"+str(i)
    vars()[var] = ""
    vars()[var_edit] = ""
    with open('states_'+str(i)+'.txt','r') as states :
        for state in states :
            state = re.sub(r"\(.*\)"," ",state)
            state_edit = ""
            state = re.sub("\n","",state)
            vars()[var] += r"\b" + state + r"\b|"
            if len(str(state)) <= 4 :
                continue
            for state_edit in spellMistakes(state) :
                if state_edit == "Nagar" or len(state_edit) <= 4:
                    continue
                vars()[var_edit] += r"\b" + state_edit + r"\b|"
    vars()[var] = vars()[var][:len(vars()[var])-1]
    vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit])-1]


#location of test file
test_file_loc = "trainAddress.json"

with open(test_file_loc,'r') as file :
    all_add = json.load(file)

#declaring ml variables
#maximum no of predictions made for a given address
for i in range(1,7) :
    var_edit = "regex_city_edit_"+str(i)
    var = "regex_city_"+str(i)
    vars()[var] = ""
    vars()[var_edit] = ""
    with open('tier_cities\\tier' + str(i) + 'cities.txt','r') as cities :
        for city in cities :
            city = re.sub(r"\(.*\)"," ",city)
            city_edit = ""
            city = re.sub("\n","",city)
            if city == "Nagar" :
                continue
            vars()[var] += r"\b" + city + r"\b|"
            if len(str(city)) <= 4 :
                continue
            for city_edit in spellMistakes(city) :
                if city_edit == "Nagar" or len(city_edit) <= 4:
                    continue
                vars()[var_edit] += r"\b" + city_edit + r"\b|"
    vars()[var] = vars()[var][:len(vars()[var])-1]
    vars()[var_edit] = vars()[var_edit][:len(vars()[var_edit])-1]

index = -1
#looping through all addresses and predicting
for dictionary in all_add :

    actual_city = dictionary["city"]
    add = dictionary["address"]

    #removing utf-16 encoding from the address
    add = re.sub(r"\\u...."," ",add)