/
wrapper.py
214 lines (183 loc) · 7.33 KB
/
wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
author: Tao Chen
"""
import getopt, time
import ConfigParser
import collections
from solver.method import *
from solver.utils import logger
from solver.subsumption import SubsumptionLattice
from solver.Constraint import LengthConstraint, IfThenConstraint, CostConstraint, process_constraints
default_parameters = 'config.ini'
# Debug print
def DebugPrint(s):
print s
with open("tmp/sergey_tmp_log","w"): # clean the log file before each run
pass
def sergeylog(s): #dump info into the log file
with open("tmp/sergey_tmp_log","a") as logfile:
logfile.write(s)
@logger
def fpMining_pure(inputs):
if inputs['type'] == 'graph':
inputs['data'] = 'data/gSpan/' + inputs['data']
inputs['output'] = 'output/gSpan/' + inputs['output']
method = gSpan(inputs)
elif inputs['type'] == 'sequence':
inputs['data'] = 'data/prefixSpan/' + inputs['data']
inputs['output'] = 'output/prefixSpan/' + inputs['output']
method = prefixSpan(inputs)
elif inputs['type'] == 'itemset':
inputs['data'] = 'data/eclat/' + inputs['data']
inputs['output'] = 'output/eclat/' + inputs['output']
method = eclat(inputs)
else:
print 'Does not support "type == %s"!' % inputs['type']
sys.exit(2)
start1 = time.time()
output = method.mining()
patterns = method.parser(output)
end1 = time.time()
print "\n*************************************"
print 'Number of frequent patterns with constraints (pure exec): %s' % len(patterns)
return patterns, len(patterns), end1-start1
@logger
def fpMining_postpro(inputs):
if inputs['type'] == 'graph':
if 'data/' not in inputs['data']:
inputs['data'] = 'data/gSpan/' + inputs['data']
inputs['output'] = 'output/gSpan/' + inputs['output']
method = gSpan(inputs)
elif inputs['type'] == 'sequence':
if 'data/' not in inputs['data']:
inputs['data'] = 'data/prefixSpan/' + inputs['data']
inputs['output'] = 'output/prefixSpan/' + inputs['output']
if 'dominance' in inputs:
seq_inputs = dict()
for key in inputs:
seq_inputs[key] = inputs[key]
seq_inputs['dominance'] = ''
method = prefixSpan(seq_inputs)
elif inputs['type'] == 'itemset':
if 'data/' not in inputs['data']:
inputs['data'] = 'data/eclat/' + inputs['data']
inputs['output'] = 'output/eclat/' + inputs['output']
if 'dominance' in inputs:
eclat_inputs = dict()
for key in inputs:
eclat_inputs[key] = inputs[key]
eclat_inputs['dominance'] = ''
method = eclat(eclat_inputs)
else:
print 'Does not support "type == %s"!' % inputs['type']
sys.exit(2)
params = inputs
# step 1 time cost
start1 = time.time()
output = method.mining()
patterns = method.parser(output)
end1 = time.time()
print "# of patterns", len(patterns)
# step 2 time cost
start2 = time.time()
patterns_pruned = process_constraints(params, patterns)
if patterns_pruned:
patterns_pruned = list(patterns_pruned)
else: patterns_pruned = []
end2 = time.time()
print "# of constrained patterns", len(patterns_pruned)
# step 3 time cost
start3 = time.time()
final_patterns = dominance_check(params, patterns_pruned)
if final_patterns:
final_patterns = list(final_patterns)
else: final_patterns = []
print "# of dominance patterns", len(final_patterns)
end3 = time.time()
print("step1: {0:.4f}, step2: {1:.4f}, step3: {2:.4f}".format(end1-start1, end2-start2, end3-start3))
return final_patterns, len(patterns), len(final_patterns), end1-start1, end2-start2, end3-start3
def dominance_check(params, patterns):
subsumLattice = SubsumptionLattice()
output_patterns = subsumLattice.check_dominance(patterns,params)
return output_patterns
if __name__ == "__main__":
# deal with command parameters
if len(sys.argv) < 2:
print 'Needs input file\n<wrapper.py -h> for help!'
sys.exit(2)
config_file = default_parameters # config file path
specialised = False
params = {} # Dict to store input parameters
try:
opts, args = getopt.getopt(sys.argv[1:], 'hc:s:', ['help=', 'config='])
except getopt.GetoptError:
print('wrapper.py -c <configfile> -s <True or False>\nSet input data and output file in config file')
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
print 'wrapper.py -c <configfile> -i <inputfile> -o <outputfile>'
sys.exit(2)
elif opt in ('-c', '--config'):
config_file = arg
elif opt in ('-s', '--specialised'):
if arg in ('true', 'True', 'T'):
specialised = True
# read parameters from config file
config = ConfigParser.ConfigParser()
config.read(config_file)
sections = config.sections()
# read basic parameters
section = 'Parameters'
options = config.options(section)
for option in options:
try:
params[option] = config.get(section, option)
if params[option] == -1:
DebugPrint("skip: %s" % option)
except:
print("exception on %s!" % option)
params[option] = None
print('Parameters: %s' % params)
# read constraints
section = 'Constraints'
options = config.options(section)
params['constraints'] = dict()
for option in options:
if option == 'length':
max_len = int (config.get(section, option))
params['constraints']['length'] = LengthConstraint(max_len)
elif option == 'ifthen':
pre, post = config.get(section, option).split(';')
params['constraints']['ifthen'] = IfThenConstraint(int(pre), int(post))
elif option == 'cost':
cost_mapping = collections.defaultdict(int)
costs = config.get(section, option).split(';')
costs, max_cost = costs[:-1], costs[-1].split(':')[-1]
for c in costs:
id, cost = c.split(':')
cost_mapping[int(id)] = int(cost)
params['constraints']['cost'] = CostConstraint(int(max_cost), cost_mapping)
else:
print("Does not support this type of constraint: %s" % option)
# frequent pattern mining
if specialised:
patterns, _, time_pure = fpMining_pure(params)
#for i in range(10):
# print patterns[i].get_graphx().nodes(data=False)
final_patterns, _1, _2, time_step1, time_step2, time_step3 = fpMining_postpro(params)
'''
with open(params['output'], "w") as output_file:
for pattern in final_patterns:
output_file.write(str(pattern))
output_file.write("\n")
output_file.write("\n\n")
for pattern in patterns:
output_file.write(str(pattern))
output_file.write("\n")
'''
print "\n*************************************"
# print "Number of frequent patterns: {0}".format(len(patterns))
# print "Number of {0} patterns: {1}".format(params['dominance'], len(closed_patterns))
# with open("{outputfile}".format(outputfile=params['output']),"w") as outputfile:
# for pattern in closed_patterns:
# outputfile.write(str(pattern))