/
file_loader.py
124 lines (109 loc) · 3.86 KB
/
file_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import time
import statics
from common import ROOT_PATH
from common import NeighborInfo
from data_structure_util import CQueue
__author__ = 'alex'
def load_map():
"""
Pass L_in, L_out, noted for in/out degree dictionary and in/out label-marked dictionary
NOTE! The initialization must be done outside the function
"""
print("开始加载文件...")
print("开始加载文件...", file=statics.f_console)
t_start_load_file = time.clock() # timer
l_in = {}
l_out = {}
l_in_menu = {}
l_out_menu = {}
node_set = set()
f = open(ROOT_PATH + "data.txt")
while True:
line = f.readline()
if len(line) < 1:
break
tpl = line.split('\t')
ori = int(tpl[0])
edg = int(tpl[1])
des = int(tpl[2])
node_set.add(ori)
node_set.add(des)
if ori not in l_out:
l_out[ori] = {}
if ori not in l_out_menu:
l_out_menu[ori] = []
if des not in l_in:
l_in[des] = {}
if des not in l_in_menu:
l_in_menu[des] = []
try:
l_in[des][edg].append(ori)
except:
l_in[des][edg] = []
l_in[des][edg].append(ori)
try:
l_out[ori][edg].append(des)
except:
l_out[ori][edg] = []
l_out[ori][edg].append(des)
if edg not in l_in_menu[des]:
l_in_menu[des].append(edg)
if edg not in l_out_menu[ori]:
l_out_menu[ori].append(edg)
f.close()
t_end_load_file = time.clock() # timer
print("结束加载文件...")
print("结束加载文件...", file=statics.f_console)
print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file))
print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file), file=statics.f_console)
t_start_sort_raw_data = time.clock() # timer
__inner_sort(l_in, l_in_menu)
__inner_sort(l_out, l_out_menu)
t_end_sort_raw_data = time.clock() # timer
print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data))
print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data), file=statics.f_console)
print("开始统计邻居信息")
print("开始统计邻居信息", file=statics.f_console)
t_start_neighbor = time.clock() # timer
# 统计邻居节点信息
neighbor = {}
# 先统计入度
visited = set()
q_node = CQueue()
q_dep = CQueue()
for node in node_set:
visited.clear()
visited.add(node)
neighbor[node] = NeighborInfo()
q_node.clear()
q_dep.clear()
q_node.put(node)
q_dep.put(0)
while not q_node.is_empty():
x_node = q_node.get()
x_dep = q_dep.get()
next_dep = x_dep + 1
if x_node not in l_in:
continue
for l in l_in[x_node]:
for ori in l_in[x_node][l]:
if ori not in visited:
neighbor[node].safe_add(ori, statics.fade_factor_pow[next_dep], target='in')
visited.add(ori)
if next_dep < statics.neighbor_threshold:
q_node.put(ori)
q_dep.put(next_dep)
neighbor[node].cal_module(target='in')
# 先不统计出度
t_end_neighbor = time.clock() # timer
print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor))
print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor), file=statics.f_console)
return l_in, l_out, l_in_menu, l_out_menu, node_set, neighbor
def __inner_sort(arr, menu):
"""
将点对应标签的对应点按照从小到大顺序排序
"""
for node in menu:
menu[node].sort()
for label in menu[node]:
arr[node][label].sort()