def best_threshold(model): def metrics(Y): positive = sum([y['target'] for y in Y]) thresholds = [0.5, 0.45, 0.4, 0.35, 0.3, 0.25] index = 0 right, wrong = 0, 0 existed_edges = {ids: test[ids]['source_edges'] for ids in test.ids} id2node = { node['osmid']: node for ids in test.ids for node in test[ids]['nodes'] } best_f1, best_th = 0, 0 for _, th in enumerate(thresholds): for i in range(index, len(Y)): if Y[i]['score'] < math.log(th): index = i break if is_valid({ 'start': Y[i]['start'], 'end': Y[i]['end'] }, existed_edges[Y[i]['id']], id2node): existed_edges[Y[i]['id']].append({ 'start': Y[i]['start'], 'end': Y[i]['end'] }) if Y[i]['target'] == 1: right += 1 else: wrong += 1 p = 1.0 * right / (right + wrong + 1e-9) r = 1.0 * right / positive f1 = 2 * p * r / (p + r + 1e-9) if best_f1 < f1: best_f1 = f1 best_th = th print(p, r, best_f1, best_th) return best_f1, best_th test = DataLoader( 'E:/python-workspace/CityRoadPrediction/data_20200610/test/') test.load_all_datas() result = load_model_result(model.lower(), data_dir) y = [] for city in result: for index, v in result[city].items(): for sample in v: y.append({ 'id': index, 'start': sample['start'], 'end': sample['end'], 'score': sample['score'], 'target': int(sample['target']) }) del result y = sorted(y, key=lambda e: e['score'], reverse=True) f1, th = metrics(y) print(f1, th)
def predict(model): def metrics(Y, ids): positive = sum([y['target'] for y in Y]) if city in ['Hongkong', 'Guangzhou', 'Singapore']: thresholds = 0.2 elif city in ['Beijing', 'Shanghai', 'Shenzhen']: thresholds = 0.45 else: thresholds = 0.6 right, wrong = 0, 0 existed_edges = test[ids]['source_edges'] id2node = {node['osmid']: node for node in test[ids]['nodes']} new_Y = [] for i in range(len(Y)): y = copy.deepcopy(Y[i]) if Y[i]['score'] > math.log(thresholds): if is_valid({ 'start': Y[i]['start'], 'end': Y[i]['end'] }, existed_edges, id2node): existed_edges.append({ 'start': Y[i]['start'], 'end': Y[i]['end'] }) y['predict'] = 1 if Y[i]['target'] == 1: right += 1 else: wrong += 1 else: y['predict'] = 0 else: y['predict'] = 0 y.pop('id') new_Y.append(y) p = 1.0 * right / (right + wrong + 1e-9) r = 1.0 * right / positive f1 = 2 * p * r / (p + r + 1e-9) print(index, p, r, f1) return right, wrong, positive, new_Y test = DataLoader( 'E:/python-workspace/CityRoadPrediction/data_2020715/test/') test.load_all_datas() result = load_model_result(model.lower(), data_dir) right, wrong, total = 0, 0, 0 for city in result: new_result = {} r_, w_, t_ = 0, 0, 0 for index, v in result[city].items(): y = [] for sample in v: y.append({ 'id': index, 'start': sample['start'], 'end': sample['end'], 'score': sample['score'], 'target': int(sample['target']) }) y = sorted(y, key=lambda e: e['score'], reverse=True) r, w, t, y = metrics(y, index) r_ += r w_ += w t_ += t new_result[index] = y p = 1.0 * r_ / (r_ + w_ + 1e-9) r = 1.0 * r_ / t_ f1 = 2 * p * r / (p + r + 1e-9) print(city, r_, w_, t_, p, r, f1) right += r_ wrong += w_ total += t_ json.dump(new_result, open( data_dir + 'relational-gcn/final/Relational-GCN-' + city + '-result.json', 'w'), indent=2) p = 1.0 * right / (right + wrong + 1e-9) r = 1.0 * right / total f1 = 2 * p * r / (p + r + 1e-9) print(p, r, f1)