def _add_temporal_basics_dump(self, dL_mean, dL_var, node_cpds, v, e): ''' basic part that is same for all cases Note to myself: Pro Discrete Parent Combination - habe eigene Mean, und habe eigene Variance Pro Continuous Parent - habe keine distributions - weil sich einfach kombinieren lässt de Sach Type definition: http://pythonhosted.org/libpgm/CPDtypes.html If i bim continuous: - Wenn ich nur continuous parents hob, oder goa koane parents - then type: lg - Wenn ich discrete parents hob oder discrete und continuous - type: lgandd If i bim discrete - if I have no parents or discrete parents - type is: discrete - if I have continuous parents - type is: "to be implemented" - do it yourself bro ''' # 1. Require initial node, per temporal variable to be at t=0 # 1. tp and dL information needs to be given - is distributed in any case # currently same for all nodes - in reality distinct for vert in v: node_cpds[vert]["dL_mean"] = dL_mean node_cpds[vert]["dL_var"] = dL_var # Generate Network from this information skel = GraphSkeleton() skel.V = v skel.E = e try: skel.toporder() except: print("Warning -> Graph has cycles -> may be irrelevant depending on use-case") return skel, node_cpds
def generate_model(self, structure_specification): # 1. create defined number of nodes per object v, node_cpds, temp_dict = self._create_nodes(structure_specification) # 2. create edges incl. time between vertices e, temp_gap_dict = self._create_edges(v, structure_specification, temp_dict) # 3. add temporal information inverted_temp_dict = self._invert_dict(temp_dict) self._temporal_information(v, temp_gap_dict, node_cpds, self._parents_dict_from_edges(e), temp_dict, inverted_temp_dict) # node_cpds passed by reference and contain temporal information # 4. Skeleton skel = GraphSkeleton() skel.V = v skel.E = e skel.toporder() # 5. Create Model tbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=True, discrete_only=True) # Discrete case - later continuous nodes # 6. Set cpds of value nodes self._set_cpds(tbn, structure_specification) return tbn
def _add_temporal_basics(self, dL_mean, dL_var, node_cpds, v, e, defaults): # 1. tp and dL information needs to be given - is distributed in any case # currently same for all nodes - in reality distinct for vert in v: if vert in defaults.keys(): node_cpds[vert]["dL_mean"] = 0 node_cpds[vert]["dL_var"] = 0 else: node_cpds[vert]["dL_mean"] = dL_mean node_cpds[vert]["dL_var"] = dL_var # Generate Network from this information skel = GraphSkeleton() skel.V = v skel.E = e skel.toporder() return skel, node_cpds
def _add_temporal_basics(self, dL_mean, dL_var, node_cpds, v, e, defaults): for vert in v: if vert in defaults.keys(): node_cpds[vert]["dL_mean"] = 0 node_cpds[vert]["dL_var"] = 0 else: node_cpds[vert]["dL_mean"] = dL_mean node_cpds[vert]["dL_var"] = dL_var # Generate Network from this information skel = GraphSkeleton() skel.V = v skel.E = e skel.toporder() return skel, node_cpds
def generate_model(self, structure_specification): ''' Dynamic Bayesian NW ''' # 1. define static structure ndata, edges, first_time, last_time, resolution = self._create_nodes_edges(structure_specification) # 2. learn parameters from it edges = set([tuple([q.split("_")[0] for q in e]) for e in edges]) nodes = set([q.split("_")[0] for q in ndata]) ndata_reduced = dict([(k, ndata[k+"_0"]) for k in nodes]) node_data, intensity_params, transition_params, _,_ = self.get_cpd_raw(nodes, edges, ndata_reduced) # 3. learn Transition matrices given condition # pro edge der zu mir geht habe eine Matrix mit # meinen Werten e.g. [0.2 0.5, 0.3]|ParA=0, ParB=1 # erzeuge also pro ParentValueCombo # matrix mit meinen Transition Werten wie in BN # 4. learn Intensity matrix # Pro Parent Value Combo eine: # Matrix X x X wobei X die Anzahl an Werten sind # die diese Variable annehmen kann # d.h Modell = nodes, edges + Trans.mats + Int.mats self.eval_cpd_entries = transition_params + intensity_params # 2. learn parameters from it nd = NodeData() nd.Vdata = ndata skel = GraphSkeleton() skel.E = edges skel.V = nodes skel.alldata = dict() skel.alldata["eval_cpd_entries_count"] = self.eval_cpd_entries skel.alldata["ndata"] = ndata_reduced# states = transition_matrix (= dim_x + dim_y) + intensity return skel
def generate_model(self, structure_specification): ''' Dynamic Bayesian NW ''' # 1. define static structure ndata, edges, first_time, last_time, resolution = self._create_nodes_edges( structure_specification) # 2. learn parameters from it nd = NodeData() nd.Vdata = ndata skel = GraphSkeleton() skel.E = edges skel.V = list(ndata.keys()) skel.toporder() bn = DBNDiscrete(skel, nd) bn.start_time = 0.0 #first_time bn.end_time = last_time bn.resolution = resolution if self.EXPLICIT_DISABLING: bn.eval_cpd_entries = self.eval_cpd_entries return bn
def _estimate_tscbn(self, sequences, debug): repeat_frequency = self.sampling_frequency # 1. per sequence draw 100 (?) valid combinations per_seq_trees, per_seq_initial_states = self._extract_sample_trees(sequences) # clean skeleton new_E = [] for ed in self.tbn.skeleton.E: if not str.startswith(ed[0], "dL_") and not str.startswith(ed[1], "dL_"): new_E.append(ed) new_V = [] for ver in self.tbn.skeleton.V: if not str.startswith(ver, "dL_"): new_V.append(ver) skel = GraphSkeleton() skel.E = new_E skel.V = new_V #print("todo: DO THIS ONLY IF SEQUENCE IS NOT COMPLETE") ping = time.clock() delta_t_distribution = {} for i in range(repeat_frequency): if i % 50 == 0: print(str(i) + "/" + str(self.sampling_frequency)) # this can be made parallel for seq_count in range(len(sequences)): try: trees = per_seq_trees[seq_count] except: continue # is ok as this sequence is probably invalid initial_states = per_seq_initial_states[seq_count] # draw valid sample - output = V0_0 = ... dL_... seq, delta_t_distribution = self.draw_valid_sample(initial_states, trees, delta_t_distribution) print("Estimation %s" % str(time.clock() - ping)) # normalize for v in self.tbn.V: if str.startswith(v, "dL_"): continue if isinstance(self.tbn.Vdata[v]["cprob"], dict): for k in self.tbn.Vdata[v]["cprob"]: self.tbn.Vdata[v]["cprob"][k] /= np.sum(self.tbn.Vdata[v]["cprob"][k]) else: s = np.sum(self.tbn.Vdata[v]["cprob"]) # uniform if not seen if s == 0: self.tbn.Vdata[v]["cprob"] = np.ones(len(self.tbn.Vdata[v]["cprob"]))/float(len(self.tbn.Vdata[v]["cprob"])) else: self.tbn.Vdata[v]["cprob"] /= s # compute time # do norm fit on last run then simply aggregate all gaussians - HALT - das muss conditioned passieren for k in delta_t_distribution: for j in delta_t_distribution[k]: mean, std = norm.fit(delta_t_distribution[k][j]) var = std * std if var == 0: var = 0.02 # else it makes no sense - as everything else then exact value is zero mean_scale = [1] * len(self.tbn.Vdata[k]["parents"]) self.tbn.Vdata[k]["hybcprob"][j] = {'variance': var, 'mean_base': mean, 'mean_scal': mean_scale} return self.tbn
def run_structure_experiment(target_path, parameter_temp_nodes_experiment=False, parameter_signals_experiment=False, comparison_experiment_temp_nodes=False, comparison_experiment_signals=False, comparison_experiment_scp=False): # number of iterations per experiment iterations = 25 # number of sequences per experiment sample_size = 5000 # ---------------------------------------------------------------------------------------- # Structure Generator Setup # ---------------------------------------------------------------------------------------- sg = StructureGenerator(test_type=TestStructureEnum.SPECIFICATION) sg.add_base_structure_models([TSCBNStructureModel]) sg.reference_model = TSCBNStructureModel # TIME SETTINGS (fixed for all experiments) sg.set_temporal_range(min_per_object_gap=0.5, max_per_object_gap=1.0) sg.set_temporal_variance(0.001) sg.set_dbn_tolerance(0.1) # PROBABILITY SETTINGS (fixed for all experiments) sg.set_state_change_probability(min_probability=0.95, max_probability=0.95) # ---------------------------------------------------------------------------------------- # Experiment with different parameters of the SBTreeDiscoverer # ---------------------------------------------------------------------------------------- if parameter_temp_nodes_experiment or parameter_signals_experiment: sd = SBTreeDiscoverer(min_out_degree=0.1, k_infrequent=0.1, approach='parent_graph', parallel=False) # filtering parameters fixed at 0.1 # parent graph approach means exact score optimization (but not exhaustive) # structure optimization not iteration in parallel for edges_per_object in [1, 3]: print('edges_per_object: ' + str(edges_per_object) + '...') L().log.info('edges_per_object: ' + str(edges_per_object) + '...') # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=edges_per_object, max_edges_per_object=edges_per_object, min_percent_inter=1.0, max_percent_inter=1.0) if parameter_temp_nodes_experiment: # 1st experiment: Increase number of temporal variables per signal # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics:ev.add_metric(metric) eval_results = dict() discovery_algorithms = set() for number_of_signals in [2, 3, 4]: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') if edges_per_object >= number_of_signals: continue numbers_of_temp_nodes = [1, 2, 3, 4, 5, 6, 7] for number_of_temp_nodes in numbers_of_temp_nodes: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_temp_nodes: dict()}) for iteration in range(0, iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = \ sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] # additional information for evaluation additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} for score in ['BIC', 'AIC', 'Bdeu', 'K2']: print('score: ' + str(score) + '...') L().log.info('score: ' + str(score) + '...') for temporal_threshold in np.arange(0.0, 2.5, 0.5): print('temporal_threshold: ' + str(temporal_threshold) + '...') L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...') # STRUCTURE DISCOVERER SETUP sd.score = score sd.max_time_difference = temporal_threshold sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold) if sd_name not in eval_results.get(number_of_temp_nodes): # initialise metrics_dict metrics_dict = dict((metric, []) for metric in metrics) eval_results.get(number_of_temp_nodes).update({sd_name: metrics_dict}) discovery_algorithms.add(sd_name) model_name = sd_name + ' (' + str(iteration) + ')' # RUN ALGORITHM L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # CREATE TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # EVALUATION eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_temp_nodes][sd_name][metric].append(value) pass pass pass pass experiment_name = 'ParameterTmpNodesExperiment_EPO_' + str(edges_per_object) + '_Sig_' + \ str(number_of_signals) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_temp_nodes, 'number_of_temp_nodes', target_path) pass pass if parameter_signals_experiment: # 2nd experiment: Increase number of signals if edges_per_object == 3: continue # TODO: remove this, when choosing a maximal number of signals larger than 5 # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() discovery_algorithms = set() for number_of_temp_nodes in [3, 5]: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') numbers_of_signals = [2, 3, 4, 5] evaluated_numbers_of_signals = copy.deepcopy(numbers_of_signals) for number_of_signals in numbers_of_signals: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') if edges_per_object >= number_of_signals: evaluated_numbers_of_signals.remove(number_of_signals) continue # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_signals: dict()}) for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(1000, {}) sequences = \ sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] # additional information for evaluation additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} for score in ['BIC', 'AIC', 'Bdeu', 'K2']: print('score: ' + str(score) + '...') L().log.info('score: ' + str(score) + '...') for temporal_threshold in np.arange(0.0, 2.5, 0.5): print('temporal_threshold: ' + str(temporal_threshold) + '...') L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...') # STRUCTURE DISCOVERER SETUP sd.score = score sd.max_time_difference = temporal_threshold sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold) if sd_name not in eval_results.get(number_of_signals): # initialise metrics_dict metrics_dict = dict((metric, []) for metric in metrics) eval_results.get(number_of_signals).update({sd_name: metrics_dict}) discovery_algorithms.add(sd_name) model_name = sd_name + ' (' + str(iteration) + ')' # RUN ALGORITHM L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # CREATE TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # EVALUATION eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_signals][sd_name][metric].append(value) pass pass pass pass experiment_name = 'ParameterSignalsExperiment_EPO_' + str(edges_per_object) + '_TmpNodes_' + \ str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, evaluated_numbers_of_signals, 'num_signals', target_path) pass pass pass pass # ---------------------------------------------------------------------------------------- # Experiments with all algorithms # ---------------------------------------------------------------------------------------- # 1st experiment: increase number of temporal nodes if comparison_experiment_temp_nodes: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_signals in [3, 4]: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') discovery_algorithms = set() numbers_of_temp_nodes = [2, 3, 4, 5, 6, 7, 8] for number_of_temp_nodes in numbers_of_temp_nodes: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_temp_nodes: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24: print('Network to large for PC algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(number_of_temp_nodes): eval_results.get(number_of_temp_nodes).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_temp_nodes][sd_name][metric].append(value) pass pass pass experiment_name = 'TempNodesExperiment_Sig_' + str(number_of_signals) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_temp_nodes, 'number_of_temp_nodes', target_path) # 2nd experiment: increase number of signals if comparison_experiment_signals: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_temp_nodes in [3]: # TODO: run with larger numbers on better hardware print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') discovery_algorithms = set() numbers_of_signals = [3, 4, 5, 6, 7, 8] for number_of_signals in numbers_of_signals: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_signals: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None, 'psi-execution-time': 0.0, 'so-execution-time': 0.0} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24: print('Network to large for PC algorithm.') continue if str.startswith(sd_name, 'sbPTM') and number_of_signals * number_of_temp_nodes > 30: print('Network to large for PTM algorithm.') continue if str.startswith(sd_name, 'cbPTM') and number_of_signals * number_of_temp_nodes > 30: print('Network to large for PTM algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(number_of_signals): eval_results.get(number_of_signals).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': 0.0, 'so_execution_time': 0.0} if sd.parent_set_identification_time and sd.structure_optimization_time: additional_infos[model_name].update( {'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time}) L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_signals][sd_name][metric].append(value) pass pass pass experiment_name = 'SignalExperiment_TmpNodes_' + str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_signals, 'number_of_signals', target_path) # 3rd experiment: different values for the state change probability if comparison_experiment_scp: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_temp_nodes in [3, 4]: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=3, max_objects=3, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=2, max_states=4) sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=3, min_percent_inter=0.5, max_percent_inter=1.0) discovery_algorithms = set() state_change_probabilities = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for state_change_probability in state_change_probabilities: print('state_change_probability: ' + str(state_change_probability) + '...') L().log.info('state_change_probability: ' + str(state_change_probability) + '...') sg.set_state_change_probability(min_probability=state_change_probability, max_probability=state_change_probability) eval_results.update({state_change_probability: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and 3 * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(state_change_probability): eval_results.get(state_change_probability).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[state_change_probability][sd_name][metric].append(value) pass pass pass experiment_name = 'SCP_Experiment_Sig_3_TmpNodes_' + str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, state_change_probabilities, 'state_change_probability', target_path)
def experiment_discovery( _run, approach, sample_size, iterations, min_per_object_gap, max_per_object_gap, temporal_variance, dbn_tolerance, sc_probability, edges_per_object, inter_edge_percent, number_of_signals, number_of_temp_nodes, sb_min_out_degree, sb_k_infrequent, sb_score, sb_max_time_difference, pc_min_out_degree, pc_k_infrequent, pc_alpha, pc_max_time_difference, pcd_alpha, pcd_max_reach, astar_score, ghc_score, ghc_tabu_length, novel_filtering, novel_k_infrequent, novel_alpha, novel_draw_it, novel_max_reach, novel_min_out_degree, pc_chi): # ---------------------------------------------------------------------------------------- # Setup # ---------------------------------------------------------------------------------------- if edges_per_object >= number_of_signals: return # Generator Setup sg = initialize_generator(min_per_object_gap, max_per_object_gap, temporal_variance, dbn_tolerance, sc_probability, edges_per_object, inter_edge_percent, number_of_signals, number_of_temp_nodes) # SD Approach sd = get_sd_approach(approach, sb_min_out_degree, sb_k_infrequent, sb_score, sb_max_time_difference, pc_min_out_degree, pc_k_infrequent, pc_alpha, pc_max_time_difference, pcd_alpha, pcd_max_reach, astar_score, ghc_score, ghc_tabu_length, novel_filtering, novel_k_infrequent, novel_alpha, novel_draw_it, novel_min_out_degree, novel_max_reach, pc_chi) # Evaluation Metrics ev = initialize_evaluator() # ---------------------------------------------------------------------------------------- # Run Experiment # ---------------------------------------------------------------------------------------- eval_results = dict() for iteration in range(iterations): print('iteration: ' + str(iteration + 1) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() print("NUMBER INTER EDGES: %s" % str( len([ e for e in models["TSCBNStructureModel"].E if e[0].split("_")[0] != e[1].split("_")[0] and not str.startswith(e[1], "dL_") ]))) in_seq = models[sg.reference_model.__name__].randomsample( sample_size, {}) sequences = sequences_to_intervals( in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = { 'execution_time': 0.0, 'data': None } # LIMITATIONS DUE TO RUNTIME PROBLEMS if hw_limitation_reached(approach, number_of_signals, number_of_temp_nodes): continue # RUN DISCOVERY ping = clock() nodes, edges = sd.discover_structure(sequences) execution_time = clock() - ping # CREATE GROUND TRUTH TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # Run Evaluation current Iteration # ---------------------------------------------------------------------------------------- model_name = str(approach) + ' (' + str(iteration) + ')' additional_infos[model_name] = { 'execution_time': execution_time, 'data': sd.data } eval_result = ev.evaluate( model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) #ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): if not metric in eval_results: eval_results[metric] = [] eval_results[metric].append(value) try: float(value) _run.log_scalar(metric, value) except: pass # ---------------------------------------------------------------------------------------- # Run Evaluation average over all Iteration # ---------------------------------------------------------------------------------------- for metric in eval_results: try: float(eval_results[metric]) _run.log_scalar("avg_%s" % metric, np.mean(eval_results[metric])) except: pass