def run_program(self, dataset, parameters): self.logger.info("Starting run\nParameters:\n{}".format( "\n".join(["\t{}: {}".format(k,v) for k,v in parameters.items()]))) kg_i, kg_s = dataset # fit model t0 = timer() # generate semantic item sets from sampled graph si_sets = generate_semantic_item_sets(kg_i) # generate common behaviour sets cbs_sets = generate_common_behaviour_sets(si_sets, parameters["similarity_threshold"], parameters["max_cbs_size"]) # generate semantic association rules rules = generate_semantic_association_rules(kg_i, kg_s, cbs_sets, parameters["minimal_local_support"]) # calculate support and confidence, skip those not meeting minimum requirements final_rule_set = [] for rule in rules: support = support_of(kg_i, rule) confidence = confidence_of(kg_i, rule) if support >= parameters["minimal_support"] and\ confidence >= parameters["minimal_confidence"]: final_rule_set.append((rule, support, confidence)) # sorting rules on both support and confidence final_rule_set.sort(key=itemgetter(2, 1), reverse=True) # time took t1 = timer() dt = t1 - t0 print(" Program completed in {:.3f} ms".format(dt)) print(" Found {} rules".format(len(final_rule_set))) return final_rule_set
def run_program(self, dataset, parameters): self.logger.info("Starting run\nParameters:\n{}".format("\n".join( ["\t{}: {}".format(k, v) for k, v in parameters.items()]))) self.logger.info( "Distributing load over {} cores".format(NUM_OF_WORKERS)) kg_i, kg_s = dataset # fit model t0 = timer() # MP manager manager = Manager() # generate semantic item sets from sampled graph si_sets = manager.dict(generate_semantic_item_sets(kg_i)) # generate common behaviour sets work = manager.Queue() keys = list(si_sets.keys()) slices = self.diagonal_matrix_slicer(keys) cbs_sets = manager.list() pool = [] for i in range(NUM_OF_WORKERS): p = Process(target=generate_common_behaviour_sets, args=(si_sets, cbs_sets, work, parameters["similarity_threshold"])) p.daemon = True p.start() pool.append(p) for slce in slices: work.put(slce) for p in pool: work.put(None) # join shared variables for p in pool: p.join() # extend common behaviour sets cbs_size = 2 cbs_sets_extended = manager.list(cbs_sets) while cbs_size < parameters["max_cbs_size"]: func = partial(extend_common_behaviour_sets, cbs_sets_extended, parameters["similarity_threshold"]) slices = self.diagonal_matrix_slicer(cbs_sets_extended) cbs_sets_extention = manager.list() with Pool(processes=NUM_OF_WORKERS) as pool: it = pool.imap_unordered(func=func, iterable=slices) while True: try: cbs_subset = next(it) cbs_sets_extention.extend(cbs_subset) except StopIteration: break cbs_sets.extend(cbs_sets_extention) cbs_sets_extended = cbs_sets_extention cbs_size *= 2 # generate semantic item sets from sampled graph association rules rules = manager.list() work = manager.Queue() size = max(1, floor(len(cbs_sets) / NUM_OF_WORKERS)) slices = [slice(i, i + size) for i in range(0, len(cbs_sets), size)] pool = [] for i in range(NUM_OF_WORKERS): p = Process(target=generate_semantic_association_rules, args=(kg_i, kg_s, cbs_sets, work, rules, parameters["minimal_local_support"])) p.daemon = True p.start() pool.append(p) for slce in slices: work.put(slce) for p in pool: work.put(None) # join shared variables for p in pool: p.join() # calculate support and confidence, skip those not meeting minimum requirements final_rule_set = manager.list() work = manager.Queue() size = max(1, floor(len(rules) / NUM_OF_WORKERS)) slices = [slice(i, i + size) for i in range(0, len(rules), size)] pool = [] for i in range(NUM_OF_WORKERS): p = Process(target=evaluate_rules, args=(kg_i, rules, work, final_rule_set, parameters["minimal_support"], parameters["minimal_confidence"])) p.daemon = True p.start() pool.append(p) for slce in slices: work.put(slce) for p in pool: work.put(None) # join shared variables for p in pool: p.join() # sorting rules on both support and confidence final_rule_set.sort(key=itemgetter(2, 1), reverse=True) # time took t1 = timer() dt = t1 - t0 print(" Program completed in {:.3f} ms".format(dt)) print(" Found {} rules".format(len(final_rule_set))) return final_rule_set