def matching(text, command): list_ = [] # list can be road or bus stop belong to the command if command == 'road': path_road = 'D:/Project/Transportation_SMU-NEC_collaboration/Data' name_road = 'road_abbrevation_all.csv' list_ = load_file(path_road, name_road) elif command == 'busstop': path_stop = 'D:/Project/Transportation_SMU-NEC_collaboration/Data' name_busstop = 'bus_stop_crf.csv' # delete the header of file bus stop list_ = load_file(path_stop, name_busstop) list_element = [] for index in range(0, len(list_)): ele = list_[index].lower() split_road = ele.split(';') for road in split_road: if pattern_match(road, text) is True: list_element.append(road) break list_token_element = [] for element in list_element: split_ = element.split() for value in split_: list_token_element.append(value) return list_token_element
def extract_road_busstop_expression(list_line, list_dict): y_label = [] y_reg = [] list_svc = [] cnt = 1 list_write = [] for line in list_line: split_line = line.split('\t') index = split_line[0] label = split_line[1].strip() y_label.append(label) svc = split_line[2].strip() list_svc.append(svc) text = split_line[3].strip().lower() # this is a text for road or bus stop # print index, label, svc list_road_match = [] for index in range(0, len(list_dict)): road = list_road[index] split_road = road.split(';') for token in split_road: if pattern_match(token.lower(), text) is True: split_token = token.split() for value in split_token: if value not in list_road_match: list_road_match.append(value.lower()) break flag = 'FALSE' if svc in list_road_match: flag = 'TRUE' y_reg.append(flag) else: flag = 'FALSE' y_reg.append(flag) print '-- finished this line -- %i' % cnt + '\t' + flag list_write.append('-- finished this line -- %i' % cnt + '\t' + flag) cnt += 1 break # for value in y_reg: # print value # for i in range(0, len(y_reg)): # if y_label[i] != y_reg[i]: # print list_svc[i] write_file('d:/', 'busstop', list_write) print metrics.accuracy_score(y_label, y_reg) print metrics.classification_report(y_label, y_reg) print metrics.confusion_matrix(y_label, y_reg)
def match_road(string, list_road): list_index = [] for index in range(0, len(list_road)): road = list_road[index] split_road = road.split(';') for token in split_road: if pattern_match(token, string) is True: list_index.append(index) break return list_index