Esempio n. 1
0
p = Preprocessor([('encoder', encoder)])
discretized_data, est = p.apply(h)
info = p.info

# --------------------- VALIDATION TEST-------------------------------
nodes_type_mixed = gru.nodes_types(h)
columns = [
    col for col in h.columns.to_list()
    if nodes_type_mixed[col] in ['disc', 'disc_num']
]  # GET ONLY DISCRETE
discrete_data = h[columns]

discretized_data, est = p.apply(discrete_data)  # warning
info = p.info

bn = Networks.HybridBN()
bn.add_nodes(descriptor=info)  # error
# ------------------------------
p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
discretized_data, est = p.apply(h)
info = p.info
# ---------------------------------------
print("has_logit=False, use_mixture=False")
bn = Networks.HybridBN()
bn.add_nodes(descriptor=info)

for node in bn.nodes:
    print(f"{node.name}: {node.type}")  # only gaussian and discrete nodes
print("#" * 150)

bn.add_edges(data=discretized_data, optimizer='HC', scoring_function=('MI', ))
Esempio n. 2
0
import pandas as pd
from sklearn import preprocessing as pp
import bamt.Networks as Networks

vk_data = pd.read_csv(r"data\real data\vk_data.csv").sample(150)

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder)])
discretized_data, est = p.apply(vk_data)
info = p.info

bn = Networks.HybridBN(has_logit=False, use_mixture=False)
bn.add_nodes(descriptor=info)
params = {
    "init_nodes": ["sex", "has_pets", "is_parent", "relation", "tr_per_month"],
    "init_edges": [("age", "mean_tr"), ("sex", "mean_tr"), ("sex", "has_pets"),
                   ("is_parent", "has_pets"), ("has_pets", "median_tr"),
                   ("is_driver", "tr_per_month"),
                   ("tr_per_month", "median_tr"), ("tr_per_month", "relation")]
}

bn.add_edges(data=discretized_data,
             optimizer='HC',
             scoring_function=('MI', ),
             params=params)
bn.fit_parameters(data=vk_data)
Esempio n. 3
0
print(h.describe())
print("-----")
p2 = time.time()
print(f"Time elapsed for preparing data: {p2 - p1}")

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

# -----------
discrete_data, est = p.apply(h)
info = p.info

bn = Networks.HybridBN(has_logit=True)  # all may vary
bn.add_nodes(descriptor=info)
bn.add_edges(data=discrete_data, optimizer='HC', scoring_function=('MI',))

bn.get_info(as_df=False)
t1 = time.time()
bn.fit_parameters(data=h)
t2 = time.time()
print(f'PL elapsed: {t2 - t1}')

columns = ['Lithology', 'Structural setting', 'Porosity', 'Depth']
validY = h[columns].dropna()
validX = h.drop(columns, axis=1).dropna()

time_1 = time.time()
pred_param = bn.predict(validX, parall_count=3)
cont_test_data = cont_data[cont_data.columns[:-1]]
cont_target = cont_data[cont_data.columns[-1]]
disc_test_data = disc_data[disc_data.columns[:-1]]
disc_target = disc_data[disc_data.columns[-1]]
hybrid_test_data = hybrid_data[hybrid_data.columns[:-1]]
hybrid_target = hybrid_data[hybrid_data.columns[-1]]

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')
p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

# Discrete pipeline
discretized_data, _ = p.apply(disc_data)
disc_bn = Networks.DiscreteBN()
info = p.info
disc_bn.add_nodes(info)
disc_bn.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
disc_bn.fit_parameters(data=disc_data)
disc_bn.calculate_weights(discretized_data)
disc_predicted_values = disc_bn.predict(test=disc_test_data)
disc_predicted_values = pd.DataFrame.from_dict(disc_predicted_values,
                                               orient='columns')
synth_disc_data = disc_bn.sample(50)

disc_bn.save('./disc_bn.json')
disc_bn2 = Networks.DiscreteBN()
disc_bn2.load('./disc_bn.json')
synth_disc_data2 = disc_bn2.sample(50)
# print(disc_bn.weights)
Esempio n. 5
0
print("-----")
p2 = time.time()
print(f"Time elapsed for preparing data: {p2 - p1}")

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='quantile')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

# -----------
discrete_data, est = p.apply(h)
info = p.info

bn = Networks.HybridBN(use_mixture=False, has_logit=True)  # all may vary
bn.add_nodes(descriptor=info)
bn.add_edges(data=discrete_data,
             optimizer='HC',
             scoring_function=('MI', ),
             classifier=RandomForestClassifier())

bn.get_info(as_df=False)
t1 = time.time()
bn.fit_parameters(data=h)
t2 = time.time()
print(f'PL elaspsed: {t2 - t1}')

# for num, el in enumerate(bn.sample(10, as_df=False), 1):
#     print('\n', num)
#     for name, val in el.items():
encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

discretized_data, est = p.apply(vk_data)
info = p.info

# Make some errors
info['types']['relation'] = 'unknown'
info['types']['sex'] = 'helicopter'

bn = Networks.HybridBN()
bn.add_nodes(descriptor=info)

# ----------

bn.set_nodes(['A node'])
print(bn.nodes == [])


class MyNode:
    pass

    def __repr__(self):
        return 'MyNode'

p2 = time.time()
print(f"Time elapsed for uploading data: {p2 - p1}")

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

nodes_type_mixed = gru.nodes_types(data)

discretized_data, est = p.apply(data)  # info
info = p.info

bn = Networks.DiscreteBN()
bn.add_nodes(descriptor=info)

bn.add_edges(data=discretized_data,
             optimizer='HC',
             scoring_function=('K2', K2Score))
bn.get_info()
t1 = time.time()
bn.fit_parameters(data=data)
t2 = time.time()
print(f'PL elaspsed: {t2 - t1}')
for node, d in bn.distributions.items():
    print(node)
    for param, value in d.items():
        print(f"{param}:{value}")
# for num, el in enumerate(bn.sample(20), 1):
p2 = time.time()
print(f"Time elapsed for uploading data: {p2 - p1}")

discretizer = pp.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile')

p = Preprocessor([('discretizer', discretizer)])

# -----------
nodes_type_mixed = p.get_nodes_types(h)
columns = [col for col in h.columns.to_list() if not nodes_type_mixed[col] in ['disc', 'disc_num']]  # GET ONLY CONT
discrete_data = h[columns]

discretized_data, est = p.apply(discrete_data)  # info
info = p.info

bn = Networks.ContinuousBN(use_mixture=True)  # use_mixture = False as well

bn.add_nodes(descriptor=info)

bn.add_edges(data=discretized_data, optimizer='HC', scoring_function=('MI',))
bn.get_info(as_df=False)
t1 = time.time()
bn.fit_parameters(data=h)
t2 = time.time()
print(f'PL elaspsed: {t2 - t1}')
# Without async: 0.00699925422668457
# With: 0.0019998550415039062
print('Improvement: %.d' % (0.00699925422668457 // 0.0019998550415039062))
# After rebuilding: 0.0

p2 = time.time()
print(f"Time elapsed for uploading data: {p2 - p1}")

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

print("#" * 1000)
discretized_data, est = p.apply(h)
info = p.info

bn = Networks.ContinuousBN()

bn.add_nodes(descriptor=info)  # Error

# -----------
nodes_type_mixed = p.get_nodes_types(h)
columns = [
    col for col in h.columns.to_list()
    if not nodes_type_mixed[col] in ['disc', 'disc_num']
]  # GET ONLY CONT
discrete_data = h[columns]

discretized_data, est = p.apply(discrete_data)  # info
info = p.info

bn = Networks.ContinuousBN()
Esempio n. 10
0
hack_data = pd.read_csv("data/real data/hack_processed_with_rf.csv")[[
    'Tectonic regime', 'Period', 'Lithology', 'Structural setting', 'Gross',
    'Netpay', 'Porosity', 'Permeability', 'Depth'
]]

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

discretized_data, est = p.apply(hack_data)

bn = Networks.HybridBN(use_mixture=True, has_logit=True)
info = p.info

bn.add_nodes(info)

structure = [("Tectonic regime", "Structural setting"), ("Gross", "Netpay"),
             ("Lithology", "Permeability")]

bn.set_structure(edges=structure)

bn.get_info(as_df=False)

with open("hack_p.json") as params:
    params = json.load(params)
    bn.set_parameters(params)
from sklearn import preprocessing as pp
import bamt.Networks as Networks
import json

hack_data = pd.read_csv("data/real data/hack_processed_with_rf.csv")[[
    'Tectonic regime', 'Period', 'Lithology', 'Structural setting', 'Gross',
    'Netpay', 'Porosity', 'Permeability', 'Depth'
]]

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
                                  encode='ordinal',
                                  strategy='uniform')

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])

discretized_data, est = p.apply(hack_data)
info = p.info

bn2 = Networks.HybridBN(use_mixture=True)
bn2.add_nodes(info)

with open("hack_p.json") as params:
    with open("hack_s.json") as structure:
        edges = json.load(structure)
        params = json.load(params)
        bn2.set_structure(edges=edges)

# bn2.get_info(as_df=False)
# bn2.plot("gg2.html")