-
Notifications
You must be signed in to change notification settings - Fork 2
/
parseRfPred.py
executable file
·112 lines (97 loc) · 3.7 KB
/
parseRfPred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import sys
import json
import math
import fiedler
import os.path
def parserow(line):
#DOES NOT RETURN CORRECT VALUES FOR VALUES WITH COMMAS IN THEM
clauses=[]
insidequotes=[]
for term in line.strip().split(","):
quotes = term.count('"')
if quotes == 1 or len(insidequotes)>0:
insidequotes.append(term)
else:
clauses.append(term)
if quotes == 1 and len(insidequotes) > 1:
clauses.append(",".join(insidequotes))
insidequotes=[]
return dict([[c.strip().strip('"').strip() for c in b] for b in [a.split("=") for a in clauses] if len(b) == 2])
#return dict([[c.strip("\"") for c in b] for b in [a.split("=") for a in line.split(",")] if len(b) == 2])
def parseRfPredict(fo, cutoff):
adj_hash = {}
spliter_by_feature ={}
predicted = "UNKNOWN"
ntrees = 0
treeid = 0
parents = {"": predicted}
for i, line in enumerate(fo):
try:
if line[:6] == "FOREST":
vhash = parserow(line)
elif line[:4] == "TREE":
vhash = parserow(line)
ntrees += 1
treeid = vhash["TREE"]
parents = {"": predicted}
predicted = vhash["TARGET"]
elif line[:4] == "NODE":
vhash = parserow(line)
if "SPLITTER" in vhash:
source = vhash["SPLITTER"]
node = vhash["NODE"]
parents[node] = source
vhash["parents"] = [parents[node[:-n]] for n in range(1, len(node))]
vhash["treeid"] = treeid
if not source in spliter_by_feature:
spliter_by_feature[source] = []
spliter_by_feature[source].append(vhash)
if not source in adj_hash:
adj_hash[source] = {}
target = parents[node[:-1]]
if not target in adj_hash[source]:
adj_hash[source][target] = 1.0
else:
adj_hash[source][target] += 1.0
except:
print "Error parsing line %s: %s\nparents:%s" % (i, line, parents)
raise
out = []
intidsbyname = {}
namesbyintid = []
incintid = 0
for source in adj_hash:
for target in adj_hash[source]:
if adj_hash[source][target] > cutoff:
for strid in [source, target]:
if not strid in intidsbyname:
intidsbyname[strid] = incintid
namesbyintid.append(strid)
incintid += 1
row = [intidsbyname[source], intidsbyname[target], float(adj_hash[source][target])]
out.append(row)
return (out, intidsbyname, namesbyintid, spliter_by_feature)
def main():
fn = sys.argv[1]
fo = open(fn)
cutoff = 1.0
if len(sys.argv) > 2:
cutoff = float(sys.argv[2])
#fn += "cutoff"+str(cutoff)
(adj_list, iByn, nByi, spliter_by_feature) = parseRfPredict(fo, cutoff)
fo.close()
fied = fiedler.fiedler(adj_list, fn=fn, plot=False, n_fied=2)
fied["adj"] = adj_list
fied["iByn"] = iByn
fied["nByi"] = nByi
#fied["sByf"] = spliter_by_feature
# fo = open(os.path.basename(fn) +".cutoff."+ str(filter_min) + ".splitters", "w")
# json.dump(spliter_by_feature, fo, indent=2)
# fo.close()
outfn=os.path.basename(fn) +".cutoff."+ str(cutoff) + ".json"
fo = open(outfn, "w")
print "Outputing fiedler results for %s to %s"%(os.path.abspath(fn),os.path.abspath(outfn))
json.dump(fied, fo, indent=2)
fo.close()
if __name__ == '__main__':
main()