def codeName_extraction(path, name, list_busCode, list_busCode_original): list_extract = [] cnt = 0 with open(path + '/' + name) as f: for line in f: cnt += 1 split_line = line.split('\t') # we can also use the function in extract road name in extract the bus code name list_index = match_road(split_line[1].lower(), list_busCode) if len(list_index) > 0: for index in list_index: # list_busCode_original have different index with list_busCode; print (split_line[0] + '\t' + list_busCode_original[index + 1]) list_extract.append(split_line[0] + '\t' + list_busCode_original[index + 1]) print cnt for value in list_extract: print value write_file(path, 'facebook_2015_BusNews_filtering_busStopName', list_extract)
def fix_busCodeName(path, name, list_busStopName, list_busCode_original): list_idBusStop = list() for index in range(1, len(list_busStopName)): split_line = list_busStopName[index].split('\t') id_index = split_line[0].lower() if id_index not in list_idBusStop: list_idBusStop.append(id_index) # cnt = 1 list_extract = list() with open(path + '/' + name) as f: for text in f: split_line = text.strip().split('\t') id_text = split_line[0] if id_text in list_idBusStop: # we can also use the function in extract road name in extract the bus code name list_index = match_road(split_line[1].lower(), list_busCode) if len(list_index) > 0: for index in list_index: # list_busCode_original have different index with list_busCode; print split_line[0] + '\t' + list_busCode_original[index + 1] list_extract.append(split_line[0] + '\t' + list_busCode_original[index + 1])