コード例 #1
0
    def _add_temporal_basics_dump(self, dL_mean, dL_var, node_cpds, v, e):
        ''' basic part that is same for all cases

        Note to myself: Pro Discrete Parent Combination - habe eigene Mean, und habe eigene Variance
                        Pro Continuous Parent - habe keine distributions - weil sich einfach kombinieren lässt de Sach
        Type definition:  http://pythonhosted.org/libpgm/CPDtypes.html
            If i bim continuous:
                - Wenn ich nur continuous parents hob, oder goa koane parents - then type: lg
                - Wenn ich discrete parents hob oder discrete und continuous - type: lgandd

            If i bim discrete
                - if I have no parents or discrete parents - type is: discrete
                - if I have continuous parents - type is: "to be implemented" - do it yourself bro
        '''

        # 1. Require initial node, per temporal variable to be at t=0

        # 1. tp and dL information needs to be given - is distributed in any case
        #    currently same for all nodes - in reality distinct
        for vert in v:
            node_cpds[vert]["dL_mean"] = dL_mean
            node_cpds[vert]["dL_var"] = dL_var

        # Generate Network from this information
        skel = GraphSkeleton()
        skel.V = v
        skel.E = e
        try:
            skel.toporder()
        except:
            print("Warning -> Graph has cycles -> may be irrelevant depending on use-case")

        return skel, node_cpds
コード例 #2
0
    def generate_model(self, structure_specification):

        # 1. create defined number of nodes per object
        v, node_cpds, temp_dict = self._create_nodes(structure_specification)

        # 2. create edges incl. time between vertices
        e, temp_gap_dict = self._create_edges(v, structure_specification, temp_dict)

        # 3. add temporal information
        inverted_temp_dict = self._invert_dict(temp_dict)
        self._temporal_information(v, temp_gap_dict, node_cpds, self._parents_dict_from_edges(e), temp_dict,
                                   inverted_temp_dict)  # node_cpds passed by reference and contain temporal information

        # 4. Skeleton
        skel = GraphSkeleton()
        skel.V = v
        skel.E = e
        skel.toporder()

        # 5. Create Model
        tbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=True,
                    discrete_only=True)  # Discrete case - later continuous nodes

        # 6. Set cpds of value nodes
        self._set_cpds(tbn, structure_specification)

        return tbn
コード例 #3
0
ファイル: ind_model.py プロジェクト: arturmrowca/tscbn
    def _add_temporal_basics(self, dL_mean, dL_var, node_cpds, v, e, defaults):
        # 1. tp and dL information needs to be given - is distributed in any case
        #    currently same for all nodes - in reality distinct
        for vert in v:
            if vert in defaults.keys():
                node_cpds[vert]["dL_mean"] = 0
                node_cpds[vert]["dL_var"] = 0
            else:
                node_cpds[vert]["dL_mean"] = dL_mean
                node_cpds[vert]["dL_var"] = dL_var

        # Generate Network from this information
        skel = GraphSkeleton()
        skel.V = v
        skel.E = e
        skel.toporder()
        return skel, node_cpds
コード例 #4
0
    def _add_temporal_basics(self, dL_mean, dL_var, node_cpds, v, e, defaults):

        for vert in v:
            if vert in defaults.keys():
                node_cpds[vert]["dL_mean"] = 0
                node_cpds[vert]["dL_var"] = 0
            else:
                node_cpds[vert]["dL_mean"] = dL_mean
                node_cpds[vert]["dL_var"] = dL_var

        # Generate Network from this information
        skel = GraphSkeleton()
        skel.V = v
        skel.E = e
        skel.toporder()

        return skel, node_cpds
コード例 #5
0
    def generate_model(self, structure_specification):
        ''' Dynamic Bayesian NW '''

        # 1. define static structure
        ndata, edges, first_time, last_time, resolution = self._create_nodes_edges(structure_specification)

        # 2. learn parameters from it
        edges = set([tuple([q.split("_")[0] for q in e]) for e in edges])
        nodes = set([q.split("_")[0] for q in ndata])
        ndata_reduced = dict([(k, ndata[k+"_0"]) for k in nodes])
        node_data, intensity_params, transition_params, _,_ = self.get_cpd_raw(nodes, edges, ndata_reduced)

        # 3. learn Transition matrices given condition
        # pro edge der zu mir geht habe eine Matrix mit
        # meinen Werten e.g. [0.2 0.5, 0.3]|ParA=0, ParB=1
        # erzeuge also pro ParentValueCombo
        #   matrix mit meinen Transition Werten wie in BN

        # 4. learn Intensity matrix
        # Pro Parent Value Combo eine:
        # Matrix X x X wobei X die Anzahl an Werten sind
        # die diese Variable annehmen kann

        # d.h Modell = nodes, edges + Trans.mats + Int.mats

        self.eval_cpd_entries = transition_params + intensity_params

        # 2. learn parameters from it
        nd = NodeData()
        nd.Vdata = ndata
        skel = GraphSkeleton()
        skel.E = edges
        skel.V = nodes
        skel.alldata = dict()
        skel.alldata["eval_cpd_entries_count"] = self.eval_cpd_entries

        skel.alldata["ndata"] = ndata_reduced# states = transition_matrix (= dim_x + dim_y) + intensity
        return skel
コード例 #6
0
    def generate_model(self, structure_specification):
        ''' Dynamic Bayesian NW '''

        # 1. define static structure
        ndata, edges, first_time, last_time, resolution = self._create_nodes_edges(
            structure_specification)

        # 2. learn parameters from it
        nd = NodeData()
        nd.Vdata = ndata
        skel = GraphSkeleton()
        skel.E = edges
        skel.V = list(ndata.keys())
        skel.toporder()
        bn = DBNDiscrete(skel, nd)
        bn.start_time = 0.0  #first_time
        bn.end_time = last_time
        bn.resolution = resolution

        if self.EXPLICIT_DISABLING: bn.eval_cpd_entries = self.eval_cpd_entries

        return bn
コード例 #7
0
    def _estimate_tscbn(self, sequences, debug):
        repeat_frequency = self.sampling_frequency

        # 1. per sequence draw 100 (?) valid combinations
        per_seq_trees, per_seq_initial_states = self._extract_sample_trees(sequences)

        # clean skeleton
        new_E = []
        for ed in self.tbn.skeleton.E:
            if not str.startswith(ed[0], "dL_") and not str.startswith(ed[1], "dL_"):
                new_E.append(ed)
        new_V = []
        for ver in self.tbn.skeleton.V:
            if not str.startswith(ver, "dL_"):
                new_V.append(ver)
        skel = GraphSkeleton()
        skel.E = new_E
        skel.V = new_V

        #print("todo: DO THIS ONLY IF SEQUENCE IS NOT COMPLETE")
        ping = time.clock()
        delta_t_distribution = {}
        for i in range(repeat_frequency):
            if i % 50 == 0:
                print(str(i) + "/" + str(self.sampling_frequency))

            # this can be made parallel
            for seq_count in range(len(sequences)):
                try:
                    trees = per_seq_trees[seq_count]
                except:
                    continue # is ok as this sequence is probably invalid
                initial_states = per_seq_initial_states[seq_count]

                # draw valid sample - output = V0_0 = ... dL_...
                seq, delta_t_distribution = self.draw_valid_sample(initial_states, trees, delta_t_distribution)

        print("Estimation %s" % str(time.clock() - ping))

        # normalize
        for v in self.tbn.V:
            if str.startswith(v, "dL_"): continue
            if isinstance(self.tbn.Vdata[v]["cprob"], dict):
                for k in self.tbn.Vdata[v]["cprob"]:
                    self.tbn.Vdata[v]["cprob"][k] /= np.sum(self.tbn.Vdata[v]["cprob"][k])
            else:
                s = np.sum(self.tbn.Vdata[v]["cprob"]) # uniform if not seen
                if s == 0: self.tbn.Vdata[v]["cprob"]  = np.ones(len(self.tbn.Vdata[v]["cprob"]))/float(len(self.tbn.Vdata[v]["cprob"]))
                else: self.tbn.Vdata[v]["cprob"] /= s

        # compute time
        # do norm fit on last run then simply aggregate all gaussians - HALT - das muss conditioned passieren
        for k in delta_t_distribution:
            for j in delta_t_distribution[k]:
                mean, std = norm.fit(delta_t_distribution[k][j])
                var = std * std
                if var == 0: var = 0.02 # else it makes no sense - as everything else then exact value is zero
                mean_scale = [1] * len(self.tbn.Vdata[k]["parents"])
                self.tbn.Vdata[k]["hybcprob"][j] = {'variance': var, 'mean_base': mean, 'mean_scal': mean_scale}

        return self.tbn
コード例 #8
0
def run_structure_experiment(target_path, parameter_temp_nodes_experiment=False, parameter_signals_experiment=False,
                          comparison_experiment_temp_nodes=False, comparison_experiment_signals=False,
                          comparison_experiment_scp=False):
    # number of iterations per experiment
    iterations = 25
    # number of sequences per experiment
    sample_size = 5000

    # ----------------------------------------------------------------------------------------
    #      Structure Generator Setup
    # ----------------------------------------------------------------------------------------
    sg = StructureGenerator(test_type=TestStructureEnum.SPECIFICATION)
    sg.add_base_structure_models([TSCBNStructureModel])
    sg.reference_model = TSCBNStructureModel

    # TIME SETTINGS (fixed for all experiments)
    sg.set_temporal_range(min_per_object_gap=0.5, max_per_object_gap=1.0)
    sg.set_temporal_variance(0.001)
    sg.set_dbn_tolerance(0.1)

    # PROBABILITY SETTINGS (fixed for all experiments)
    sg.set_state_change_probability(min_probability=0.95, max_probability=0.95)

    # ----------------------------------------------------------------------------------------
    #      Experiment with different parameters of the SBTreeDiscoverer
    # ----------------------------------------------------------------------------------------
    if parameter_temp_nodes_experiment or parameter_signals_experiment:
        sd = SBTreeDiscoverer(min_out_degree=0.1, k_infrequent=0.1, approach='parent_graph', parallel=False)
        # filtering parameters fixed at 0.1
        # parent graph approach means exact score optimization (but not exhaustive)
        # structure optimization not iteration in parallel

        for edges_per_object in [1, 3]:
            print('edges_per_object: ' + str(edges_per_object) + '...')
            L().log.info('edges_per_object: ' + str(edges_per_object) + '...')

            # EDGE SETTINGS
            sg.set_connection_ranges(min_edges_per_object=edges_per_object, max_edges_per_object=edges_per_object,
                                     min_percent_inter=1.0, max_percent_inter=1.0)

            if parameter_temp_nodes_experiment:
                # 1st experiment: Increase number of temporal variables per signal

                # EVALUATOR SETUP
                ev = StructureEvaluator(True)
                ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime())))
                metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel",
                           "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld",
                           "execution-time", "psi-execution-time", "so-execution-time"]
                for metric in metrics:ev.add_metric(metric)
                eval_results = dict()
                discovery_algorithms = set()

                for number_of_signals in [2, 3, 4]:
                    print('number_of_signals: ' + str(number_of_signals) + '...')
                    L().log.info('number_of_signals: ' + str(number_of_signals) + '...')

                    if edges_per_object >= number_of_signals:
                        continue

                    numbers_of_temp_nodes = [1, 2, 3, 4, 5, 6, 7]
                    for number_of_temp_nodes in numbers_of_temp_nodes:
                        print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')
                        L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')

                        # NODE SETTINGS
                        sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals,
                                          min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes,
                                          min_states=3, max_states=3)

                        eval_results.update({number_of_temp_nodes: dict()})

                        for iteration in range(0, iterations):
                            print('iteration: ' + str(iteration) + '...')
                            L().log.info('iteration: ' + str(iteration) + '...')

                            # SAMPLE DATA
                            models, specifications = sg.run_next_testcase()
                            in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {})
                            sequences = \
                            sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

                            # additional information for evaluation
                            additional_infos = dict()
                            additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None}

                            for score in ['BIC', 'AIC', 'Bdeu', 'K2']:
                                print('score: ' + str(score) + '...')
                                L().log.info('score: ' + str(score) + '...')

                                for temporal_threshold in np.arange(0.0, 2.5, 0.5):
                                    print('temporal_threshold: ' + str(temporal_threshold) + '...')
                                    L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...')

                                    # STRUCTURE DISCOVERER SETUP
                                    sd.score = score
                                    sd.max_time_difference = temporal_threshold

                                    sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold)
                                    if sd_name not in eval_results.get(number_of_temp_nodes):  # initialise metrics_dict
                                        metrics_dict = dict((metric, []) for metric in metrics)
                                        eval_results.get(number_of_temp_nodes).update({sd_name: metrics_dict})
                                        discovery_algorithms.add(sd_name)
                                    model_name = sd_name + ' (' + str(iteration) + ')'

                                    # RUN ALGORITHM
                                    L().log.info('----------------------------------------------------------')
                                    print('Run approach ' + model_name + '.')
                                    L().log.info('Run approach ' + model_name + '.')
                                    ping = clock()
                                    nodes, edges = sd.discover_structure(sequences)
                                    L().log.info('Nodes: ' + str(nodes))
                                    L().log.info('Edges: ' + str(edges))
                                    execution_time = clock() - ping
                                    additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data,
                                                                    'psi_execution_time': sd.parent_set_identification_time,
                                                                    'so_execution_time': sd.structure_optimization_time}
                                    L().log.info('Execution time: ' + str(execution_time))
                                    L().log.info('----------------------------------------------------------')

                                    # CREATE TSCBN
                                    skel = GraphSkeleton()
                                    skel.V = nodes
                                    skel.E = edges
                                    skel.toporder()
                                    model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True,
                                                  forbid_never=True, discrete_only=True)

                                    # EVALUATION
                                    eval_result = ev.evaluate(model_dict={model_name: model},
                                                              reference=models[sg.reference_model.__name__],
                                                              additional_infos=additional_infos)
                                    ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
                                    for metric, value in eval_result[model_name].items():
                                        eval_results[number_of_temp_nodes][sd_name][metric].append(value)
                                    pass
                                pass
                            pass
                        pass
                    experiment_name = 'ParameterTmpNodesExperiment_EPO_' + str(edges_per_object) + '_Sig_' + \
                                      str(number_of_signals)
                    relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel",
                                        "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time",
                                        "so-execution-time"]
                    write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms,
                                        numbers_of_temp_nodes, 'number_of_temp_nodes', target_path)
                pass
            pass

            if parameter_signals_experiment:
                # 2nd experiment: Increase number of signals

                if edges_per_object == 3:
                    continue  # TODO: remove this, when choosing a maximal number of signals larger than 5

                # EVALUATOR SETUP
                ev = StructureEvaluator(True)
                ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime())))
                metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel",
                           "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld",
                           "execution-time", "psi-execution-time", "so-execution-time"]
                for metric in metrics:
                    ev.add_metric(metric)
                eval_results = dict()
                discovery_algorithms = set()

                for number_of_temp_nodes in [3, 5]:
                    print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')
                    L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')

                    numbers_of_signals = [2, 3, 4, 5]
                    evaluated_numbers_of_signals = copy.deepcopy(numbers_of_signals)
                    for number_of_signals in numbers_of_signals:
                        print('number_of_signals: ' + str(number_of_signals) + '...')
                        L().log.info('number_of_signals: ' + str(number_of_signals) + '...')

                        if edges_per_object >= number_of_signals:
                            evaluated_numbers_of_signals.remove(number_of_signals)
                            continue

                        # NODE SETTINGS
                        sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals,
                                          min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes,
                                          min_states=3, max_states=3)

                        eval_results.update({number_of_signals: dict()})

                        for iteration in range(iterations):
                            print('iteration: ' + str(iteration) + '...')
                            L().log.info('iteration: ' + str(iteration) + '...')

                            # SAMPLE DATA
                            models, specifications = sg.run_next_testcase()
                            in_seq = models[sg.reference_model.__name__].randomsample(1000, {})
                            sequences = \
                            sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

                            # additional information for evaluation
                            additional_infos = dict()
                            additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None}

                            for score in ['BIC', 'AIC', 'Bdeu', 'K2']:
                                print('score: ' + str(score) + '...')
                                L().log.info('score: ' + str(score) + '...')

                                for temporal_threshold in np.arange(0.0, 2.5, 0.5):
                                    print('temporal_threshold: ' + str(temporal_threshold) + '...')
                                    L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...')

                                    # STRUCTURE DISCOVERER SETUP
                                    sd.score = score
                                    sd.max_time_difference = temporal_threshold

                                    sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold)
                                    if sd_name not in eval_results.get(number_of_signals):  # initialise metrics_dict
                                        metrics_dict = dict((metric, []) for metric in metrics)
                                        eval_results.get(number_of_signals).update({sd_name: metrics_dict})
                                        discovery_algorithms.add(sd_name)
                                    model_name = sd_name + ' (' + str(iteration) + ')'

                                    # RUN ALGORITHM
                                    L().log.info('----------------------------------------------------------')
                                    print('Run approach ' + model_name + '.')
                                    L().log.info('Run approach ' + model_name + '.')
                                    ping = clock()
                                    nodes, edges = sd.discover_structure(sequences)
                                    L().log.info('Nodes: ' + str(nodes))
                                    L().log.info('Edges: ' + str(edges))
                                    execution_time = clock() - ping
                                    additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data,
                                                                    'psi_execution_time': sd.parent_set_identification_time,
                                                                    'so_execution_time': sd.structure_optimization_time}
                                    L().log.info('Execution time: ' + str(execution_time))
                                    L().log.info('----------------------------------------------------------')

                                    # CREATE TSCBN
                                    skel = GraphSkeleton()
                                    skel.V = nodes
                                    skel.E = edges
                                    skel.toporder()
                                    model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True,
                                                  forbid_never=True, discrete_only=True)

                                    # EVALUATION
                                    eval_result = ev.evaluate(model_dict={model_name: model},
                                                              reference=models[sg.reference_model.__name__],
                                                              additional_infos=additional_infos)
                                    ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
                                    for metric, value in eval_result[model_name].items():
                                        eval_results[number_of_signals][sd_name][metric].append(value)
                                    pass
                                pass
                            pass
                        pass
                    experiment_name = 'ParameterSignalsExperiment_EPO_' + str(edges_per_object) + '_TmpNodes_' + \
                                      str(number_of_temp_nodes)
                    relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel",
                                        "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time",
                                        "so-execution-time"]
                    write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms,
                                        evaluated_numbers_of_signals, 'num_signals', target_path)
                pass
            pass
        pass
    pass

    # ----------------------------------------------------------------------------------------
    #      Experiments with all algorithms
    # ----------------------------------------------------------------------------------------
    # 1st experiment: increase number of temporal nodes
    if comparison_experiment_temp_nodes:
        # EDGE SETTINGS
        sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2,
                                 min_percent_inter=1.0, max_percent_inter=1.0)

        # EVALUATOR SETUP
        ev = StructureEvaluator(True)
        ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime())))
        metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel",
                   "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"]
        for metric in metrics:
            ev.add_metric(metric)
        eval_results = dict()

        for number_of_signals in [3, 4]:
            print('number_of_signals: ' + str(number_of_signals) + '...')
            L().log.info('number_of_signals: ' + str(number_of_signals) + '...')

            discovery_algorithms = set()

            numbers_of_temp_nodes = [2, 3, 4, 5, 6, 7, 8]
            for number_of_temp_nodes in numbers_of_temp_nodes:
                print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')
                L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')

                # NODE SETTINGS
                sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals,
                                  min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes,
                                  min_states=3, max_states=3)

                eval_results.update({number_of_temp_nodes: dict()})
                metrics_dict = dict((metric, []) for metric in metrics)

                # ---------------------------------------------------
                #   RUN Structure Discovery several times
                # ---------------------------------------------------
                for iteration in range(iterations):
                    print('iteration: ' + str(iteration) + '...')
                    L().log.info('iteration: ' + str(iteration) + '...')

                    # SAMPLE DATA
                    models, specifications = sg.run_next_testcase()
                    in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {})
                    sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

                    additional_infos = dict()
                    additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None}

                    # ---------------------------------------------------
                    #   Discovery Algorithm
                    # ---------------------------------------------------
                    for sd_name, sd in get_structure_discovery_algorithms():

                        # LIMITATIONS DUE TO RUNTIME PROBLEMS
                        # TODO: run all algorithms for all networks on a better hardware
                        if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16:
                            print('Network to large for A* algorithm.')
                            continue
                        if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24:
                            print('Network to large for PC algorithm.')
                            continue

                        discovery_algorithms.add(sd_name)
                        if sd_name not in eval_results.get(number_of_temp_nodes):
                            eval_results.get(number_of_temp_nodes).update({sd_name: copy.deepcopy(metrics_dict)})

                        model_name = sd_name + ' (' + str(iteration) + ')'
                        L().log.info('----------------------------------------------------------')
                        print('Run approach ' + model_name + '.')
                        L().log.info('Run approach ' + model_name + '.')

                        ping = clock()
                        nodes, edges = sd.discover_structure(sequences)
                        L().log.info('Nodes: ' + str(nodes))
                        L().log.info('Edges: ' + str(edges))
                        execution_time = clock() - ping
                        additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data}
                        L().log.info('Execution time: ' + str(execution_time))
                        L().log.info('----------------------------------------------------------')

                        # create TSCBN
                        skel = GraphSkeleton()
                        skel.V = nodes
                        skel.E = edges
                        skel.toporder()
                        model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True,
                                      forbid_never=True, discrete_only=True)

                        # ----------------------------------------------------------------------------------------
                        #       EVALUATION
                        # ----------------------------------------------------------------------------------------
                        eval_result = ev.evaluate(model_dict={model_name: model},
                                                  reference=models[sg.reference_model.__name__],
                                                  additional_infos=additional_infos)
                        ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
                        for metric, value in eval_result[model_name].items():
                            eval_results[number_of_temp_nodes][sd_name][metric].append(value)
                        pass
                    pass
                pass
            experiment_name = 'TempNodesExperiment_Sig_' + str(number_of_signals)
            relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel",
                                "shd-skel", "kld", "execution-time"]
            write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms,
                                numbers_of_temp_nodes, 'number_of_temp_nodes', target_path)

    # 2nd experiment: increase number of signals
    if comparison_experiment_signals:
        # EDGE SETTINGS
        sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2,
                                 min_percent_inter=1.0, max_percent_inter=1.0)

        # EVALUATOR SETUP
        ev = StructureEvaluator(True)
        ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime())))
        metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel",
                   "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time",
                   "psi-execution-time", "so-execution-time"]
        for metric in metrics:
            ev.add_metric(metric)
        eval_results = dict()

        for number_of_temp_nodes in [3]:  # TODO: run with larger numbers on better hardware
            print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')
            L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')

            discovery_algorithms = set()

            numbers_of_signals = [3, 4, 5, 6, 7, 8]
            for number_of_signals in numbers_of_signals:
                print('number_of_signals: ' + str(number_of_signals) + '...')
                L().log.info('number_of_signals: ' + str(number_of_signals) + '...')

                # NODE SETTINGS
                sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals,
                                  min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes,
                                  min_states=3, max_states=3)

                eval_results.update({number_of_signals: dict()})
                metrics_dict = dict((metric, []) for metric in metrics)

                # ---------------------------------------------------
                #   RUN Structure Discovery several times
                # ---------------------------------------------------
                for iteration in range(iterations):
                    print('iteration: ' + str(iteration) + '...')
                    L().log.info('iteration: ' + str(iteration) + '...')

                    # SAMPLE DATA
                    models, specifications = sg.run_next_testcase()
                    in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {})
                    sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

                    additional_infos = dict()
                    additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None,
                                                                     'psi-execution-time': 0.0,
                                                                     'so-execution-time': 0.0}

                    # ---------------------------------------------------
                    #   Discovery Algorithm
                    # ---------------------------------------------------
                    for sd_name, sd in get_structure_discovery_algorithms():

                        # LIMITATIONS DUE TO RUNTIME PROBLEMS
                        # TODO: run all algorithms for all networks on a better hardware
                        if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16:
                            print('Network to large for A* algorithm.')
                            continue
                        if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24:
                            print('Network to large for PC algorithm.')
                            continue
                        if str.startswith(sd_name, 'sbPTM') and number_of_signals * number_of_temp_nodes > 30:
                            print('Network to large for PTM algorithm.')
                            continue
                        if str.startswith(sd_name, 'cbPTM') and number_of_signals * number_of_temp_nodes > 30:
                            print('Network to large for PTM algorithm.')
                            continue

                        discovery_algorithms.add(sd_name)
                        if sd_name not in eval_results.get(number_of_signals):
                            eval_results.get(number_of_signals).update({sd_name: copy.deepcopy(metrics_dict)})

                        model_name = sd_name + ' (' + str(iteration) + ')'
                        L().log.info('----------------------------------------------------------')
                        print('Run approach ' + model_name + '.')
                        L().log.info('Run approach ' + model_name + '.')

                        ping = clock()
                        nodes, edges = sd.discover_structure(sequences)
                        L().log.info('Nodes: ' + str(nodes))
                        L().log.info('Edges: ' + str(edges))
                        execution_time = clock() - ping
                        additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data,
                                                        'psi_execution_time': 0.0, 'so_execution_time': 0.0}
                        if sd.parent_set_identification_time and sd.structure_optimization_time:
                            additional_infos[model_name].update(
                                {'psi_execution_time': sd.parent_set_identification_time,
                                 'so_execution_time': sd.structure_optimization_time})
                        L().log.info('Execution time: ' + str(execution_time))
                        L().log.info('----------------------------------------------------------')

                        # create TSCBN
                        skel = GraphSkeleton()
                        skel.V = nodes
                        skel.E = edges
                        skel.toporder()
                        model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True,
                                      forbid_never=True, discrete_only=True)

                        # ----------------------------------------------------------------------------------------
                        #       EVALUATION
                        # ----------------------------------------------------------------------------------------
                        eval_result = ev.evaluate(model_dict={model_name: model},
                                                  reference=models[sg.reference_model.__name__],
                                                  additional_infos=additional_infos)
                        ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
                        for metric, value in eval_result[model_name].items():
                            eval_results[number_of_signals][sd_name][metric].append(value)
                        pass
                    pass
                pass
            experiment_name = 'SignalExperiment_TmpNodes_' + str(number_of_temp_nodes)
            relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel",
                                "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"]
            write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms,
                                numbers_of_signals, 'number_of_signals', target_path)

    # 3rd experiment: different values for the state change probability
    if comparison_experiment_scp:
        # EDGE SETTINGS
        sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2,
                                 min_percent_inter=1.0, max_percent_inter=1.0)

        # EVALUATOR SETUP
        ev = StructureEvaluator(True)
        ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime())))
        metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel",
                   "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld",
                   "execution-time"]
        for metric in metrics:
            ev.add_metric(metric)
        eval_results = dict()

        for number_of_temp_nodes in [3, 4]:
            print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')
            L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...')

            # NODE SETTINGS
            sg.set_node_range(min_objects=3, max_objects=3,
                              min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes,
                              min_states=2, max_states=4)
            sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=3, min_percent_inter=0.5,
                                     max_percent_inter=1.0)

            discovery_algorithms = set()

            state_change_probabilities = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
            for state_change_probability in state_change_probabilities:
                print('state_change_probability: ' + str(state_change_probability) + '...')
                L().log.info('state_change_probability: ' + str(state_change_probability) + '...')

                sg.set_state_change_probability(min_probability=state_change_probability,
                                                max_probability=state_change_probability)

                eval_results.update({state_change_probability: dict()})
                metrics_dict = dict((metric, []) for metric in metrics)

                # ---------------------------------------------------
                #   RUN Structure Discovery several times
                # ---------------------------------------------------
                for iteration in range(iterations):
                    print('iteration: ' + str(iteration) + '...')
                    L().log.info('iteration: ' + str(iteration) + '...')

                    # SAMPLE DATA
                    models, specifications = sg.run_next_testcase()
                    in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {})
                    sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

                    additional_infos = dict()
                    additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None}

                    # ---------------------------------------------------
                    #   Discovery Algorithm
                    # ---------------------------------------------------
                    for sd_name, sd in get_structure_discovery_algorithms():

                        # LIMITATIONS DUE TO RUNTIME PROBLEMS
                        # TODO: run all algorithms for all networks on a better hardware
                        if str.startswith(sd_name, 'Astar') and 3 * number_of_temp_nodes > 16:
                            print('Network to large for A* algorithm.')
                            continue

                        discovery_algorithms.add(sd_name)
                        if sd_name not in eval_results.get(state_change_probability):
                            eval_results.get(state_change_probability).update({sd_name: copy.deepcopy(metrics_dict)})

                        model_name = sd_name + ' (' + str(iteration) + ')'
                        L().log.info('----------------------------------------------------------')
                        print('Run approach ' + model_name + '.')
                        L().log.info('Run approach ' + model_name + '.')

                        ping = clock()
                        nodes, edges = sd.discover_structure(sequences)
                        L().log.info('Nodes: ' + str(nodes))
                        L().log.info('Edges: ' + str(edges))
                        execution_time = clock() - ping
                        additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data}
                        L().log.info('Execution time: ' + str(execution_time))
                        L().log.info('----------------------------------------------------------')

                        # create TSCBN
                        skel = GraphSkeleton()
                        skel.V = nodes
                        skel.E = edges
                        skel.toporder()
                        model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True,
                                      forbid_never=True, discrete_only=True)

                        # ----------------------------------------------------------------------------------------
                        #       EVALUATION
                        # ----------------------------------------------------------------------------------------
                        eval_result = ev.evaluate(model_dict={model_name: model},
                                                  reference=models[sg.reference_model.__name__],
                                                  additional_infos=additional_infos)
                        ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
                        for metric, value in eval_result[model_name].items():
                            eval_results[state_change_probability][sd_name][metric].append(value)
                        pass
                    pass
                pass
            experiment_name = 'SCP_Experiment_Sig_3_TmpNodes_' + str(number_of_temp_nodes)
            relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel",
                                "shd-skel", "kld", "execution-time"]
            write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms,
                                state_change_probabilities, 'state_change_probability', target_path)
コード例 #9
0
def experiment_discovery(
        _run, approach, sample_size, iterations, min_per_object_gap,
        max_per_object_gap, temporal_variance, dbn_tolerance, sc_probability,
        edges_per_object, inter_edge_percent, number_of_signals,
        number_of_temp_nodes, sb_min_out_degree, sb_k_infrequent, sb_score,
        sb_max_time_difference, pc_min_out_degree, pc_k_infrequent, pc_alpha,
        pc_max_time_difference, pcd_alpha, pcd_max_reach, astar_score,
        ghc_score, ghc_tabu_length, novel_filtering, novel_k_infrequent,
        novel_alpha, novel_draw_it, novel_max_reach, novel_min_out_degree,
        pc_chi):

    # ----------------------------------------------------------------------------------------
    #      Setup
    # ----------------------------------------------------------------------------------------
    if edges_per_object >= number_of_signals: return
    # Generator Setup
    sg = initialize_generator(min_per_object_gap, max_per_object_gap,
                              temporal_variance, dbn_tolerance, sc_probability,
                              edges_per_object, inter_edge_percent,
                              number_of_signals, number_of_temp_nodes)
    # SD Approach
    sd = get_sd_approach(approach, sb_min_out_degree, sb_k_infrequent,
                         sb_score, sb_max_time_difference, pc_min_out_degree,
                         pc_k_infrequent, pc_alpha, pc_max_time_difference,
                         pcd_alpha, pcd_max_reach, astar_score, ghc_score,
                         ghc_tabu_length, novel_filtering, novel_k_infrequent,
                         novel_alpha, novel_draw_it, novel_min_out_degree,
                         novel_max_reach, pc_chi)
    # Evaluation Metrics
    ev = initialize_evaluator()

    # ----------------------------------------------------------------------------------------
    #      Run Experiment
    # ----------------------------------------------------------------------------------------
    eval_results = dict()
    for iteration in range(iterations):
        print('iteration: ' + str(iteration + 1) + '...')

        # SAMPLE DATA
        models, specifications = sg.run_next_testcase()
        print("NUMBER INTER EDGES: %s" % str(
            len([
                e for e in models["TSCBNStructureModel"].E
                if e[0].split("_")[0] != e[1].split("_")[0]
                and not str.startswith(e[1], "dL_")
            ])))
        in_seq = models[sg.reference_model.__name__].randomsample(
            sample_size, {})
        sequences = sequences_to_intervals(
            in_seq, models[sg.reference_model.__name__].Vdata, False)[0]

        additional_infos = dict()
        additional_infos[sg.reference_model.__name__] = {
            'execution_time': 0.0,
            'data': None
        }

        # LIMITATIONS DUE TO RUNTIME PROBLEMS
        if hw_limitation_reached(approach, number_of_signals,
                                 number_of_temp_nodes):
            continue

        # RUN DISCOVERY
        ping = clock()
        nodes, edges = sd.discover_structure(sequences)
        execution_time = clock() - ping

        # CREATE GROUND TRUTH TSCBN
        skel = GraphSkeleton()
        skel.V = nodes
        skel.E = edges
        skel.toporder()
        model = TSCBN("",
                      skel,
                      models[sg.reference_model.__name__].Vdata,
                      unempty=True,
                      forbid_never=True,
                      discrete_only=True)

        # ----------------------------------------------------------------------------------------
        #       Run Evaluation current Iteration
        # ----------------------------------------------------------------------------------------
        model_name = str(approach) + ' (' + str(iteration) + ')'
        additional_infos[model_name] = {
            'execution_time': execution_time,
            'data': sd.data
        }
        eval_result = ev.evaluate(
            model_dict={model_name: model},
            reference=models[sg.reference_model.__name__],
            additional_infos=additional_infos)
        #ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True)
        for metric, value in eval_result[model_name].items():
            if not metric in eval_results: eval_results[metric] = []
            eval_results[metric].append(value)
            try:
                float(value)
                _run.log_scalar(metric, value)
            except:
                pass

    # ----------------------------------------------------------------------------------------
    #       Run Evaluation average over all Iteration
    # ----------------------------------------------------------------------------------------
    for metric in eval_results:
        try:
            float(eval_results[metric])
            _run.log_scalar("avg_%s" % metric, np.mean(eval_results[metric]))
        except:
            pass