Exemple #1
0
    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = network_factory.create()
        #builder.create_cluster_variable(network, 5)

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                builder.create_continuous_variable(network, c_name)

        if not dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                builder.create_discrete_variable(network, self._discrete, d_name, blanks=self._blanks)

        network = bayesianpy.network.remove_single_state_nodes(network)

        return network
Exemple #2
0
    def create(self, network_factory):
        network = network_factory.create()
        cluster = builder.try_get_node(network, "Cluster")
        if cluster is None:
            cluster = builder.create_cluster_variable(
                network,
                self._latent_states,
                variable_name=self._latent_variable_name)

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                self._logger.info("Pre-processing {} column".format(c_name))
                c = builder.create_continuous_variable(network, c_name)
                try:
                    builder.create_link(network, cluster, c)
                except ValueError as e:
                    self._logger.warn(e)

        if not dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                if d_name in self._discrete_states:
                    states = self._discrete_states[str(d_name)]
                else:
                    states = dk.compute(self._discrete[str(
                        d_name)].dropna().unique()).tolist()

                try:
                    c = builder.create_discrete_variable(
                        network, self._discrete, str(d_name), states)

                    builder.create_link(network, cluster, c)
                except BaseException as e:
                    self._logger.warn(e)

        return network
Exemple #3
0
    def create(self, network_factory):
        network = network_factory.create()

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                c = builder.create_continuous_variable(network, c_name)

        if dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                if d_name in self._discrete_states:
                    states = self._discrete_states[d_name]
                else:
                    states = dk.compute(
                        self._discrete[d_name].dropna().unique()).tolist()

                try:
                    c = builder.create_discrete_variable(
                        network, self._discrete, d_name, states)
                except BaseException as e:
                    self._logger.warn(e)

        parent_node = builder.try_get_node(network, self._parent_node)
        if parent_node is None:
            raise ValueError("Parent node: {} not recognised".format(
                self._parent_node))

        for node in network.getNodes():
            if node == parent_node:
                continue
            builder.create_link(network, parent_node, node)

        return network
def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    cluster = builder.create_cluster_variable(network, 4)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    head_variables = [
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
    ]

    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset)

        queries = [
            bayesianpy.model.QueryConditionalJointProbability(
                head_variables=[v], tail_variables=['iris_class'])
            for v in head_variables
        ]

        (engine, _, _) = bayesianpy.model.InferenceEngine(network).create()
        query = bayesianpy.model.SingleQuery(network, engine, logger)
        results = query.query(queries, aslist=True)
        jd = bayesianpy.visual.JointDistribution()
        fig = plt.figure(figsize=(10, 10))

        for i, r in enumerate(list(results)):
            ax = fig.add_subplot(2, 2, i + 1)
            jd.plot_distribution_with_variance(ax, iris,
                                               queries[i].get_head_variables(),
                                               r)

        plt.show()
Exemple #5
0
    def create(self, network_factory: bayesianpy.network.NetworkFactory):
        network = network_factory.create()
        cluster = builder.create_cluster_variable(network, self._latent_states)

        if not dk.empty(self._continuous):
            for c_name in self._continuous.columns:
                c = builder.create_discretised_variable(network, self._continuous, c_name, bin_count=self._bin_count,
                                                        mode=self._binning_mode, zero_crossing=self._zero_crossing)

                builder.create_link(network, cluster, c)

        if not dk.empty(self._discrete):
            for d_name in self._discrete.columns:
                states = dk.compute(self._discrete[d_name].dropna().unique())
                c = builder.create_discrete_variable(network, self._discrete, d_name, states)
                builder.create_link(network, cluster, c)

        return network
def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    cluster = builder.create_cluster_variable(network, 4)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    jd = bayesianpy.visual.JointDistribution()

    def plot(head_variables, results):

        fig = plt.figure(figsize=(10, 10))
        n = len(head_variables) - 1
        total = n * (n + 1) / 2

        k = 1
        for i, hv in enumerate(head_variables):
            for j in range(i + 1, len(head_variables)):
                ax = fig.add_subplot(total / 2, 2, k)
                jd.plot_distribution_with_covariance(
                    ax, iris, (head_variables[i], head_variables[j]), results)

                k += 1
        plt.show()

    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset)

        head_variables = [
            'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
        ]

        query_type_class = bayesianpy.model.QueryConditionalJointProbability(
            head_variables=head_variables,
            tail_variables=['iris_class', 'Cluster'])

        (engine, _, _) = bayesianpy.model.InferenceEngine(network).create()
        query = bayesianpy.model.Query(network, engine, logger)
        results_class = query.execute([query_type_class])

        plot(head_variables, results_class)

        query_type_cluster = bayesianpy.model.QueryConditionalJointProbability(
            head_variables=head_variables, tail_variables=['Cluster'])

        results_cluster = query.execute([query_type_cluster])

        plot(head_variables, results_cluster)
from bayesianpy.network import Builder as builder
import bayesianpy.network

nt = bayesianpy.network.create_network()

# where df is your dataframe
task = builder.create_discrete_variable(nt, df, 'task')

size = builder.create_continuous_variable(nt, 'size')
grasp_pose = builder.create_continuous_variable(nt, 'GraspPose')

builder.create_link(nt, size, grasp_pose)
builder.create_link(nt, task, grasp_pose)

for v in ['fill level', 'object shape', 'side graspable']:
    va = builder.create_discrete_variable(nt, df, v)
    builder.create_link(nt, va, grasp_pose)
    builder.create_link(nt, task, va)

# write df to data store
with bayesianpy.data.DataSet(df, bayesianpy.utils.get_path_to_parent_dir(__file__), logger) as dataset:
    model = bayesianpy.model.NetworkModel(nt, logger)
    model.train(dataset)

    # to query model multi-threaded
    results = model.batch_query(dataset, [bayesianpy.model.QueryModelStatistics()], append_to_df=False)




Exemple #8
0
def main():

    logger = logging.getLogger()
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)

    bayesianpy.jni.attach(logger)

    db_folder = bayesianpy.utils.get_path_to_parent_dir(__file__)
    iris = pd.read_csv(os.path.join(db_folder, "data/iris.csv"),
                       index_col=False)

    network = bayesianpy.network.create_network()
    num_clusters = 3
    cluster = builder.create_cluster_variable(network, num_clusters)
    node = builder.create_multivariate_continuous_node(
        network,
        iris.drop('iris_class', axis=1).columns.tolist(), "joint")
    builder.create_link(network, cluster, node)

    class_variable = builder.create_discrete_variable(
        network, iris, 'iris_class', iris['iris_class'].unique())
    builder.create_link(network, cluster, class_variable)

    train, test = train_test_split(iris, test_size=0.7)

    # train the model and query the most likely states and probability of each latent state.
    with bayesianpy.data.DataSet(iris, db_folder, logger) as dataset:
        model = bayesianpy.model.NetworkModel(network, logger)
        model.train(dataset.subset(train.index.tolist()))

        test_subset = dataset.subset(test.index.tolist())

        results = model.batch_query(
            test_subset,
            # creates columns Cluster$$Cluster0, Cluster$$Cluster1,
            # Cluster$$Cluster2, as
            # suffix is set to an empty string.
            [
                bayesianpy.model.QueryStateProbability("Cluster", suffix=""),
                # creates column 'iris_class_maxlikelihood'
                bayesianpy.model.QueryMostLikelyState("iris_class"),
                # creates column 'Cluster_maxlikelihood'
                bayesianpy.model.QueryMostLikelyState("Cluster")
            ])

    cluster_accuracy = {}
    # get a list of cluster accuracies, using the Bayes Server Confusion matrix class
    # weighted by the Cluster accuracy.
    with bayesianpy.data.DataSet(results, db_folder, logger) as resultset:
        for c in range(num_clusters):
            matrix = bayesianpy.jni.bayesServerAnalysis()\
                .ConfusionMatrix.create(resultset.create_data_reader_command(), "iris_class",
                                        "iris_class_maxlikelihood", "Cluster$$Cluster{}".format(c))
            cluster_accuracy.update(
                {'Cluster{}'.format(c): matrix.getAccuracy()})

    # generate samples from the trained model, to give us some additional testing data.
    samples = bayesianpy.model.Sampling(network).sample(num_samples=20).drop(
        ["Cluster", "iris_class"], axis=1)
    reader = bayesianpy.data.DataFrameReader(samples)
    inference = bayesianpy.model.InferenceEngine(network).create_engine()
    evidence = bayesianpy.model.Evidence(network, inference)
    query = bayesianpy.model.SingleQuery(network, inference, logger)
    query_type = [bayesianpy.model.QueryStateProbability('Cluster', suffix="")]

    # query the expected Cluster membership, and generate a wrapper for
    # comparing the values, weighted by cluster membership.
    while reader.read():
        result = query.query(query_type,
                             evidence=evidence.apply(reader.to_dict()))
        cv_results = []
        for i, (key, value) in enumerate(result.items()):
            n = bayesianpy.network.Discrete.fromstring(key)
            weighting = cluster_accuracy[n.state]
            cv_results.append(bayesianpy.jni.bayesServerAnalysis().
                              DefaultCrossValidationTestResult(
                                  jp.JDouble(weighting),
                                  jp.JObject(value, jp.java.lang.Object),
                                  jp.java.lang.Double(jp.JDouble(value))))

        score = bayesianpy.jni.bayesServerAnalysis().CrossValidation.combine(
            jp.java.util.Arrays.asList(cv_results),
            bayesianpy.jni.bayesServerAnalysis().CrossValidationCombineMethod.
            WEIGHTED_AVERAGE)

        # append the score on to the existing dataframe
        samples.set_value(reader.get_index(), 'score', score)

    variables = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

    cmap = plt.cm.get_cmap('Blues')
    fig = plt.figure(figsize=(10, 10))
    k = 1
    # plot!
    for i, v in enumerate(variables):
        for j in range(i + 1, len(variables)):
            v1 = variables[j]
            ax = fig.add_subplot(3, 2, k)
            ax.set_title("{} vs {}".format(v, v1))
            ax.scatter(x=iris[v].tolist(),
                       y=iris[v1].tolist(),
                       facecolors='none',
                       alpha=0.1)
            h = ax.scatter(x=samples[v].tolist(),
                           y=samples[v1].tolist(),
                           c=samples['score'].tolist(),
                           vmin=samples.score.min(),
                           vmax=samples.score.max(),
                           cmap=cmap)
            k += 1

    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    fig.colorbar(h, cax=cbar_ax)
    plt.show()