def main(): #download_file() KNN, zone_int_dict = get_model() # no duplicate value, so reverse this dictionary int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys())) file_list = glob.glob(local_path + date_time + '/*csv.gz') # mac_sn_dict : { (mac_address,time_accurate_to_10_seconds):[signal_strength according to the routers] } mac_sn_dict = {} for f in file_list: placement_name = re.findall("\d{7}", f)[0] router_bssid = PLC_ROUTER_DICT[placement_name] try: with gzip.open(f, 'rb') as fp: reader = csv.reader(fp) for row in reader: # jump first row if reader.line_num == 1: continue # process each ros mac = str(row[3]).lower() ssid = str(row[7]) time = int(row[0]) / 10 * 10 sn = int(row[6]) if router_bssid in ROUTERS: index = ROUTERS.index(router_bssid) # routers a,b,c if phone send 2 pkgs(c,d) c is detected by a, d is detected by b,c ,can get good result without seqid as key tmp_key = (mac, time) if tmp_key not in mac_sn_dict: mac_sn_dict[tmp_key] = [-100] * ROUTERS_NUM mac_sn_dict[tmp_key][index] = sn except Exception as e: print e print f fout = open(RESULT_CSV, 'w') writer = csv.writer(fout) writer.writerow(['mac', 'time'] + ROUTERS + ['zone']) vender_checker = get_vendor_checker() data_row_cache = [] for key in mac_sn_dict: mac = key[0] if str(mac) in ROUTERS: continue vender = vender_checker.get_manuf(str(mac)) time = datetime.datetime.utcfromtimestamp(key[1] - 8 * 60 * 60) # transfer to local time x_data = np.array([map(int, mac_sn_dict[key])]) predict_result = int_zone_dict[int((KNN.predict(x_data)[0]))] tmp_probability = max(KNN.predict_proba(x_data)[0]) data_row_cache.append([mac, time] + mac_sn_dict[key] + [predict_result, vender]) print 'sort the result' data_row_cache = sorted(data_row_cache, key=lambda x: (x[0], x[1])) for row in data_row_cache: writer.writerow(row) print 'finished'
def main_process(): KNN, path_int_dict = get_model() # no duplicate value, so reverse this dictionary int_path_dict = dict(zip(path_int_dict.values(), path_int_dict.keys())) file_list = glob.glob(SRC_FOLDER + '/*csv.gz') # mac_sn_dict : { (mac_address,time_accurate_to_10_seconds):[signal_strength according to the routers] } mac_sn_dict = {} for f in file_list: placement_name = re.findall("\d{7}", f)[0] router_bssid = PLC_ROUTER_DICT[placement_name] try: with gzip.open(f, 'rb') as fp: reader = csv.reader(fp) for row in reader: # jump first row if reader.line_num == 1: continue # process each ros mac = str(row[3]).lower() ssid = str(row[7]) time = int(row[0]) / 10 * 10 sn = int(row[6]) if router_bssid in ROUTERS: index = ROUTERS.index(router_bssid) tmp_key = (mac, time) if tmp_key not in mac_sn_dict: mac_sn_dict[tmp_key] = [-100] * ROUTERS_NUM mac_sn_dict[tmp_key][index] = sn except Exception as e: print e print f fout = open(RESULT_CSV, 'w') writer = csv.writer(fout) writer.writerow(['mac', 'time'] + ROUTERS + ['path', 'probability', 'vendor']) data_row_cache = [] for key in mac_sn_dict: mac = key[0] if str(mac) in ROUTERS: continue time = datetime.datetime.utcfromtimestamp(key[1] + 8 * 60 * 60) # transfer to local time data_row_cache.append([mac, time] + mac_sn_dict[key]) print 'sort the result' data_row_cache = sorted(data_row_cache, key=lambda x: (x[0], x[1])) row_len = len(data_row_cache) diff_sn_cache = [] new_data_row_cache = [] for i in range(row_len - 2): mac_a = data_row_cache[i][0] mac_b = data_row_cache[i + 1][0] # delete last row, save the info in new_data_row_cache if mac_a != mac_b: continue start_sn_list = data_row_cache[i][-3:] end_sn_list = data_row_cache[i + 1][-3:] # A to B diff_sn = [x - y for (x, y) in zip(end_sn_list, start_sn_list)] diff_sn_cache.append(diff_sn) new_data_row_cache.append(data_row_cache[i]) # predict predict_result = KNN.predict(numpy.array(diff_sn_cache)) proba_result = KNN.predict_proba(numpy.array(diff_sn_cache)) # add path,probability,vendor row_len = len(new_data_row_cache) vender_checker = get_vendor_checker() for i in range(row_len - 1): mac = new_data_row_cache[i][0] tmp_path = int_path_dict[predict_result[i]] tmp_proba = max(proba_result[i]) vender = vender_checker.get_manuf(str(mac)) new_data_row_cache[i] += [str(tmp_path), tmp_proba, vender] # save in csv for row in new_data_row_cache: writer.writerow(row) print 'finished'