Example #1
0
class TimeSampling:
    timeout = 600.0

    def setup(self):
        self.model = get_example_model('alarm')
        self.s = BayesianModelSampling(self.model)

    def time_forward_sample(self):
        self.model.simulate(n_samples=int(1e4), show_progress=False)

    def time_rejection_sample(self):
        self.model.simulate(n_samples=int(1e4),
                            evidence={
                                "HISTORY": "TRUE",
                                "HR": "NORMAL"
                            },
                            show_progress=False)

    def time_likelihood_sample(self):
        self.s.likelihood_weighted_sample(evidence=[("HISTORY", "TRUE"),
                                                    ("HR", "NORMAL")],
                                          size=int(1e4))

    def time_gibbs_sampling(self):
        gibbs_samples = GibbsSampling(model=self.model)
        gibbs_sampling.sample(size=int(1e4))
Example #2
0
def sampling(model, n=1000, verbose=3):
    '''
    
    Parameters
    ----------
    model:      [DICT] Contains model and adjmat

    n:          [INT] Number of samples to generate
                n=1000 (default)

    verbose:    [INT] Print messages to screen.
                0: NONE
                1: ERROR
                2: WARNING
                3: INFO (default)
                4: DEBUG
                5: TRACE
    Returns
    -------
    Pandas DataFrame

    '''

    assert n > 0, 'n must be 1 or larger'
    assert 'BayesianModel' in str(
        type(model['model'])
    ), 'Model must contain DAG from BayesianModel. Note that <misarables> example does not include DAG.'

    # http://pgmpy.org/sampling.html
    inference = BayesianModelSampling(model['model'])
    # inference = GibbsSampling(model)
    # Forward sampling and make dataframe
    df = inference.forward_sample(size=n, return_type='dataframe')
    return (df)
    def __init__(self, parameters=None):
        super().__init__(parameters)

        # set up the network based on the parameters
        self.model = DBN()
        self.model.add_nodes_from(self.parameters['nodes'])
        self.model.add_edges_from(self.parameters['edges'])

        print(f'EDGES: {sorted(self.model.edges())}')

        import ipdb
        ipdb.set_trace()

        # TODO -- add 'evidence' -- get from network?
        cpds = (TabularCPD(variable=node_id,
                           variable_card=len(values),
                           values=values,
                           evidence=[]) for node_id, values in
                self.parameters['conditional_probabilities'])
        self.model.add_cpds(cpds)

        # make an inference instance for sampling the model
        self.inference = BayesianModelSampling(self.model)

        # get a sample
        sample = self.inference.forward_sample(size=2)
Example #4
0
def generate_datasets(networks, folder, nb_samples=2000):
    for network in networks:
        dataset_out_path = os.path.join(folder, 'datasets', network + '.csv')
        inference = BayesianModelSampling(networks[network])
        samples = inference.forward_sample(size=nb_samples)

        samples.to_csv(dataset_out_path)
Example #5
0
def generate_time_series(
    sampler: BayesianModelSampling,
    length: int,
    labels: typing.List[str],
    seed: int = 42,
):
    # Initialize progress bar
    pbar = notebook.tqdm(total=length)

    # Generate first sample given no evidence
    with io.capture_output() as captured:
        # When no evidence is provided, the function under-the-hood performs forward sampling
        sample = sampler.rejection_sample(seed=seed)
    sample = sample.reindex(sorted(sample.columns), axis=1)

    # Split sample in 'current' and 'next' slices:
    # - the 'current' slice will be the first row of the generated time series
    # - the 'next' slice is added as the second row, and will be used as
    # evidence for subsequent predictions
    df_synth = sample.filter(regex="_T$")
    next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist()
    df_synth = df_synth.append(pd.Series(next_slice, index=df_synth.columns),
                               ignore_index=True)
    evidence = [
        State(n, v) for n, v in zip(df_synth.columns.values, next_slice)
    ]

    # Update progress bar
    pbar.update(2)

    for _ in range(2, length):
        # Generate new data
        with io.capture_output() as captured:
            sample = sampler.rejection_sample(evidence=evidence)
        sample = sample.reindex(sorted(sample.columns), axis=1)

        # Append 'next' slice to the generated time series, and use it as new evidence
        next_slice = sample.filter(regex="_T\+1").iloc[0].values.tolist()
        df_synth = df_synth.append(pd.Series(next_slice,
                                             index=df_synth.columns),
                                   ignore_index=True)
        evidence = [
            State(n, v) for n, v in zip(df_synth.columns.values, next_slice)
        ]

        # Update progress bar
        pbar.update(1)
    # Close progress bar
    pbar.close()
    # Update column names
    df_synth.columns = labels
    return df_synth
Example #6
0
    def sample(self, nb_sample=1):
        # sampling of pgmpy samples the index of the values
        # Here we convert back this index to the actual value
        def convert(samples):
            for col in samples.columns:
                _, states = self.get_state_space(col)
                samples[col] = samples[col].apply(lambda x: states[x])
            return samples

        inference = BayesianModelSampling(self.bn)
        samples = inference.forward_sample(size=nb_sample)

        return convert(samples)
Example #7
0
    def __init__(self, model, actions, py_func):
        """ model is a pgmpy.BayesianModel
            actions is a list of (var,value) tuples """
        self.py_func = py_func
        self.parents = sorted(model.get_parents('Y'))
        self.N = len(self.parents)
        self.actions = actions
        self.K = len(actions)

        self.observational_model = model
        self.observational_inference = VariableElimination(
            self.observational_model)
        self.post_action_models = [
            GeneralModel.do(model, action) for action in actions
        ]
        self.samplers = [
            BayesianModelSampling(model_a)
            for model_a in self.post_action_models
        ]

        self.interventional_distributions = []
        for indx, new_model in enumerate(self.post_action_models):
            infer = VariableElimination(new_model)
            _, distribution_over_parents = infer.query(self.parents)
            self.interventional_distributions.append(distribution_over_parents)

        self.pre_compute()
Example #8
0
    def infer(self, evidence, new_evidence):

        evidence.update(new_evidence)

        new_model, additional_evidence = self.reduce_model(evidence)

        try:
            if self.inference_type == InferenceType.BeliefPropagation:
                inference = BeliefPropagation(new_model)
            elif self.inference_type == InferenceType.GibbsSampling:
                inference = GibbsSampling(new_model)
            elif self.inference_type == InferenceType.BayesianModelSampler:
                inference = BayesianModelSampling(new_model)
        except Exception as e:
            # for factor in new_model.factors:
            #     print(factor)
            raise e

        self.evidence = {
            var: val
            for (var, val) in evidence.items() if "F(" not in var
        }
        self.evidence.update(additional_evidence)
        self.inference = inference
        self.scope = get_scope(new_model)

        return new_model
Example #9
0
    def sample(self, n_samples=1) :
        """
        Sample n data points from the Bayesian Network

        :param n_samples: int, amount of datapoints to generate.
        :return: Dataframe of new datapoints shape (n_samples,n_features)
        """
        np.random.seed(self.random_state)

        inference = BayesianModelSampling(self.model)
        
        Y = inference.forward_sample(size=n_samples, return_type='dataframe')

        Y = Y[sorted(Y.columns)]
    
        return Y[cols]
Example #10
0
 def setUp(self):
     self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                          ('J', 'Q'), ('J', 'L'),
                                          ('G', 'L')])
     cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
     cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
     cpd_j = TabularCPD('J', 2,
                        [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                        ['R', 'A'], [2, 2])
     cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
     cpd_l = TabularCPD('L', 2,
                        [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                        ['G', 'J'], [2, 2])
     cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
     self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
     self.sampling_inference = BayesianModelSampling(self.bayesian_model)
     self.markov_model = MarkovModel()
 def setUp(self):
     self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"),
                                          ("J", "Q"), ("J", "L"),
                                          ("G", "L")])
     cpd_a = TabularCPD("A", 2, [[0.2], [0.8]])
     cpd_r = TabularCPD("R", 2, [[0.4], [0.6]])
     cpd_j = TabularCPD("J", 2,
                        [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                        ["R", "A"], [2, 2])
     cpd_q = TabularCPD("Q", 2, [[0.9, 0.2], [0.1, 0.8]], ["J"], [2])
     cpd_l = TabularCPD("L", 2,
                        [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                        ["G", "J"], [2, 2])
     cpd_g = TabularCPD("G", 2, [[0.6], [0.4]])
     self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
     self.sampling_inference = BayesianModelSampling(self.bayesian_model)
     self.markov_model = MarkovModel()
Example #12
0
def rejection_estimate(n):
    inferences = BayesianModelSampling(disease_model)
    evidences = [
        State(var='Fatigue', state=0),
        State(var='Fever', state=0),
        State(var='FluShot', state=0)
    ]

    p = inferences.rejection_sample(evidences, n)
    i = 0

    for t in range(n):
        if p['Flu'][t] == float(0):
            i = i + 1
            plt.plot(t, (i / n), 'bo')
    plt.ylabel('Evolving esimate')
    plt.xlabel('Number of samples')
    plt.show()
Example #13
0
    def getDataset(self, size=1000, return_type='DataFrame'):
        """
        Method: retrun a set of samples generated from Bayesian Network. (Simply using forward-sampling)

        Parameters
        ----------
        size: size of the dataset to be generated (default: 1000)

        return_type: return type of dataset (default: panda.DataFrame)

        """
        # For more info, see: likelihood_weighted, rejection or Gibb sampling
        from pgmpy.sampling import BayesianModelSampling

        inference = BayesianModelSampling(self.__covid_model)
        dataset = inference.forward_sample(size=size, return_type=return_type)

        return dataset
Example #14
0
def sampling(DAG, n=1000, verbose=3):
    """Generate sample(s) using forward sampling from joint distribution of the bayesian network.

    Parameters
    ----------
    DAG : dict
        Contains model and adjmat of the DAG.
    n : int, optional
        Number of samples to generate. The default is 1000.
    verbose : int, optional
        Print progress to screen. The default is 3.
        0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE

    Returns
    -------
    df : pd.DataFrame().
        Dataframe containing sampled data from the input DAG model.


    Example
    -------
    >>> import bnlearn
    >>> DAG = bnlearn.import_DAG('sprinkler')
    >>> df = bnlearn.sampling(DAG, n=1000)

    """
    if n <= 0: raise ValueError('n must be 1 or larger')
    if 'BayesianModel' not in str(type(DAG['model'])):
        raise ValueError('DAG must contain BayesianModel.')
    if verbose >= 3:
        print('[bnlearn] >Forward sampling for %.0d samples..' % (n))

    if len(DAG['model'].get_cpds()) == 0:
        print(
            '[bnlearn] >This seems like a DAG containing only edges, and no CPDs. Tip: use bn.parameter_learning.fit(DAG, df) to learn the CPDs first.'
        )
        return

    # http://pgmpy.org/sampling.html
    infer_model = BayesianModelSampling(DAG['model'])
    # inference = GibbsSampling(model['model'])
    # Forward sampling and make dataframe
    df = infer_model.forward_sample(size=n, return_type='dataframe')
    return (df)
Example #15
0
def sample(N):
    bn_generate = BayesianModel([('D', 'G'), ('I', 'G'), ('E', 'L'),
                                 ('G', 'L')])

    cpd_d = TabularCPD('D', 2, [[0.6], [0.4]])
    cpd_i = TabularCPD('I', 2, [[0.7], [0.3]])
    cpd_g = TabularCPD('G', 3, [[0.3, 0.9, 0.05, 0.5], [0.4, 0.08, 0.25, 0.3],
                                [0.3, 0.02, 0.7, 0.2]], ['D', 'I'], [2, 2])
    cpd_e = TabularCPD('E', 2, [[0.5], [0.5]])
    cpd_l = TabularCPD(
        'L', 2,
        [[0.1, 0.3, 0.4, 0.25, 0.8, 0.99], [0.9, 0.7, 0.6, 0.75, 0.2, 0.01]],
        ['G', 'E'], [3, 2])

    bn_generate.add_cpds(cpd_d, cpd_i, cpd_g, cpd_e, cpd_l)

    infer = BayesianModelSampling(bn_generate)
    data = infer.forward_sample(N)
    return data, bn_generate
Example #16
0
def sampling(model, n=1000, verbose=3):
    """Sample based on DAG.

    Parameters
    ----------
    model : dict
        Contains model and adjmat.
    n : int, optional
        Number of samples to generate. The default is 1000.
    verbose : int, optional
        Print progress to screen. The default is 3.
        0: NONE
        1: ERROR
        2: WARNING
        3: INFO (default)
        4: DEBUG
        5: TRACE

    Returns
    -------
    df : pd.DataFrame().


    Example
    -------
    >>> import bnlearn
    >>> model = bnlearn.import_DAG('sprinkler')
    >>> df = bnlearn.sampling(model, n=1000)

    """
    assert n > 0, 'n must be 1 or larger'
    assert 'BayesianModel' in str(
        type(model['model'])
    ), 'Model must contain DAG from BayesianModel. Note that <misarables> example does not include DAG.'
    if verbose >= 3:
        print('[BNLEARN][sampling] Forward sampling for %.0d samples..' % (n))

    # http://pgmpy.org/sampling.html
    inference = BayesianModelSampling(model['model'])
    # inference = GibbsSampling(model)
    # Forward sampling and make dataframe
    df = inference.forward_sample(size=n, return_type='dataframe')
    return (df)
def sample_slots(model_info_file, mr_slot_names):
    model_info = helpers.load_from_pickle(model_info_file)
    model = model_info['model']
    inference = BayesianModelSampling(model)
    # use the missing mr slots as evidence
    all_slots = model_info['all_slots']
    missing_slots = [mr for mr in all_slots if mr not in mr_slot_names]
    evidence = [State(mr, 0) for mr in missing_slots]
    inference = BayesianModelSampling(model)
    # don't allow empty samples
    sampled_slots = []
    while (sampled_slots == []):
        sample = inference.rejection_sample(evidence=evidence,
                                            size=1,
                                            return_type='recarray')
        # return a list of the column names which had presence
        sampled_slots = [
            name for var, name in zip(sample.view('<i8'), sample.dtype.names)
            if var == 1
        ]
    return sampled_slots
Example #18
0
def sample_dag(dag, num):

    #zzz this loses disconnected nodes!!!
    # bayesmod = BayesianModel(dag.edges())
    # bayesmod = BayesianModel(dag)
    bayesmod = BayesianModel()
    bayesmod.add_nodes_from(dag.nodes())
    bayesmod.add_edges_from(dag.edges())

    tab_cpds = []
    cards = {node: len(dag.node[node]['cpd']) for node in dag.nodes()}
    for node in dag.nodes():
        parents = dag.predecessors(node)
        cpd = dag.node[node]['cpd']
        if parents:
            parent_cards = [cards[par] for par in parents]
            logging.debug("TablularCPD({}, {}, {}, {}, {})".format(
                node, cards[node], cpd, parents, parent_cards))
            tab_cpds.append(
                TabularCPD(node, cards[node], cpd, parents, parent_cards))
        else:
            logging.debug("TablularCPD({}, {}, {})".format(
                node, cards[node], cpd))
            tab_cpds.append(TabularCPD(node, cards[node], cpd))

    logging.debug("cpds add: {}".format(tab_cpds))

    print "model variables:", bayesmod.nodes()
    for tab_cpd in tab_cpds:
        print "cpd variables:", tab_cpd.variables

    bayesmod.add_cpds(*tab_cpds)

    logging.debug("cpds get: {}".format(bayesmod.get_cpds()))
    inference = BayesianModelSampling(bayesmod)

    logging.debug("generating data")
    recs = inference.forward_sample(size=num, return_type='recarray')
    return recs
class DynamicBayesianNetwork(Process):

    defaults = {
        'nodes': [],
        'edges': [],
        'conditional_probabilities': {
            'node_id': []
        }
    }

    def __init__(self, parameters=None):
        super().__init__(parameters)

        # set up the network based on the parameters
        self.model = DBN()
        self.model.add_nodes_from(self.parameters['nodes'])
        self.model.add_edges_from(self.parameters['edges'])

        print(f'EDGES: {sorted(self.model.edges())}')

        import ipdb
        ipdb.set_trace()

        # TODO -- add 'evidence' -- get from network?
        cpds = (TabularCPD(variable=node_id,
                           variable_card=len(values),
                           values=values,
                           evidence=[]) for node_id, values in
                self.parameters['conditional_probabilities'])
        self.model.add_cpds(cpds)

        # make an inference instance for sampling the model
        self.inference = BayesianModelSampling(self.model)

        # get a sample
        sample = self.inference.forward_sample(size=2)

    def ports_schema(self):
        return {}

    def next_update(self, timestep, states):
        return {}
 def setUp(self):
     self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                          ('J', 'L'), ('G', 'L')])
     cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
     cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
     cpd_j = TabularCPD('J', 2,
                        [[0.9, 0.6, 0.7, 0.1],
                         [0.1, 0.4, 0.3, 0.9]],
                        ['R', 'A'], [2, 2])
     cpd_q = TabularCPD('Q', 2,
                        [[0.9, 0.2],
                         [0.1, 0.8]],
                        ['J'], [2])
     cpd_l = TabularCPD('L', 2,
                        [[0.9, 0.45, 0.8, 0.1],
                         [0.1, 0.55, 0.2, 0.9]],
                        ['G', 'J'], [2, 2])
     cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
     self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
     self.sampling_inference = BayesianModelSampling(self.bayesian_model)
     self.markov_model = MarkovModel()
Example #21
0
cpd_e = TabularCPD(variable='E',
                   variable_card=2,
                   values=[[0.95, 0.2], [0.05, 0.8]],
                   evidence=['M'],
                   evidence_card=[2])

# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_m, cpd_r, cpd_l, cpd_e)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
model.check_model()

# Forward_sample then iterate and count strong musician/ good letter/both
inference = BayesianModelSampling(model)
numSamples = 10000
samples = inference.forward_sample(size=numSamples, return_type='recarray')

part1 = 0
strongLetter = 0
weakMusician = 0
strongLetterWeakMuscician = 0

# Samples have structure (M E D R L)
for sample in samples:
    # P(m = strong)P(d = low)P(r = ∗ ∗ |m = strong, d = low)P(e = high|m = strong)P(letter = weak| ∗ ∗)
    if sample[0] and not sample[2] and sample[3] == 2 and sample[
            1] and not sample[4]:
        part1 += 1
    # P(letter = strong)
Example #22
0
# Associating the CPDs with the network
pg_model.add_cpds(cpd_parent_edu, cpd_screentime, cpd_physical, cpd_obesity,
                  cpd_self_harm)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
pg_model.check_model()

# examine conditional independence relationships:
pg_model.local_independencies("parent_education")
pg_model.local_independencies("child_obesity")
pg_model.local_independencies("child_screen_time")
pg_model.local_independencies("child_physical_activity")

# sample data from the network:
inference = BayesianModelSampling(pg_model)
sim_n = 50_000
simulated_sample = inference.forward_sample(size=sim_n)
for colname_j in simulated_sample.columns:
    simulated_sample[colname_j] = (
        simulated_sample[colname_j] == "high").astype(int)

# draw correlation plot of the variables:
corr_mat = simulated_sample.corr()
corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)

# eaxmple: if we condition on "child_screen_time"..
# ..then "child_physical_activity" becomes independent of "parent_education":
corr_mat = simulated_sample.query("child_screen_time==1").drop(
    "child_screen_time", axis=1).corr()
corr_mat.style.background_gradient(cmap="coolwarm").set_precision(2)
Example #23
0
        [0.1, 0.2, 1, 1, 0.8, 0.9, 1, 1]
    ],  #p(~G)
    evidence=[
        'BrokeElectionLaw', 'PoliticallyMotivatedProsecutor', 'Indicted'
    ],
    evidence_card=[2, 2, 2])

cpd_j = TabularCPD(variable='Jailed',
                   variable_card=2,
                   values=[[0.9, 0.0], [0.1, 1.0]],
                   evidence=['FoundGuilty'],
                   evidence_card=[2])

#Associar os model aos nodos
election_model.add_cpds(cpd_b, cpd_i, cpd_m, cpd_g, cpd_j)

#Verificar as independencias
print(election_model.get_independencies())

samples = BayesianModelSampling(election_model).forward_sample(size=int(1e5))
samples.head()

#Mostrar estimativas
mle = MaximumLikelihoodEstimator(model=election_model, data=samples)
print("\nEstimating the CPD for a single node.\n")
print(mle.estimate_cpd(node='BrokeElectionLaw'))
print(mle.estimate_cpd(node='PoliticallyMotivatedProsecutor'))
print(mle.estimate_cpd(node='Indicted'))
print(mle.estimate_cpd(node='FoundGuilty'))
print(mle.estimate_cpd(node='Jailed'))
Example #24
0
model = BayesianModel([('IncomeQ', 'Bedrooms'), ('HhSize', 'Bedrooms'),
                       ('IncomeQ', 'RentQ'), ('Bedrooms', 'RentQ')])
#nx.draw_networkx(model, with_labels=True)

modelData = hh[model.nodes()].copy()
testData = modelData.iloc[int(0.85 * modelData.shape[0]):int(modelData.shape[0]
                                                             )].copy()
trainData = modelData.iloc[0:int(0.85 * modelData.shape[0])].copy()

model.fit(trainData, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
#    print("CPD of {variable}:".format(variable=cpd.variable))
#    print(cpd)

model_sample = BayesianModelSampling(model)
pickle.dump(model_sample, open('results/sampler.p', 'wb'))

# open the nhts sample and add the inferred resType requirements
nhtsSample = pd.read_csv('results/nhtsSample.csv')
resType = []
for ind, row in nhtsSample.iterrows():
    evidence = [
        State('IncomeQ', min(row['hh_income'] - 1, 10)),
        State('HhSize', min(row['hh_size'] - 1, 5))
    ]
    sample = model_sample.likelihood_weighted_sample(evidence=evidence, size=1)
    resType.extend([int(sample['Bedrooms']) * 3 + int(sample['RentQ'])])
nhtsSample['resType'] = resType
os.chdir('..')
nhtsSample[nhtsSample['occupation_type'] == 1].sample(
Example #25
0
cpd1=[]
cpd1.append(p_21)
cpd1.append(p_52)
cpd1.append(p_14)
cpd1.append(p_64)
cpd1.append(p_36)
cpd1.append(p4)

model1.add_cpds(*cpd1)

print("------------------------------------------")
print("Edges of model1:", model1.edges())
print("Checking Model1:", model1.check_model())
print("------------------------------------------")
'''generate data for model1'''
inference = BayesianModelSampling(model1)
data=inference.forward_sample(size=3000, return_type='dataframe')
print("Data for model1:")
print(data)   
k2=K2Score(data)
print('Model1 K2 Score: ' + str(k2.score(model1)))

'''Inference'''
from pgmpy.inference import VariableElimination
infer = VariableElimination(model1)
print("Inference of x3:")
print(infer.query(['x3']) ['x3'])
print("Inference of x5|x2:")
print(infer.query(['x5'], evidence={ 'x2': 1}) ['x5'])

Example #26
0
def task3():
	global task4_best_bm, task2_best_bm, task2_best_mm, task4_best_mm
	st1 = time.time()
	task2_best_bm_samples = (BayesianModelSampling(task2_best_bm)).forward_sample(size=1000)
	et1 = time.time()
	diff1 = et1 - st1
	
	task2_best_bm_samplesC = task2_best_bm_samples.copy()
	task2_best_bm_samplesC.drop('x1', axis=1, inplace=True)
	task2_bm_predicted = task2_best_bm.predict(task2_best_bm_samplesC)
	
	task2_best_mm = task2_best_bm.to_markov_model()
	st2 = time.time()
	task2_best_mm_samples = (GibbsSampling(task2_best_mm)).sample(size=1000)
	et2 = time.time()
	diff2 = et2 - st2
	task2_best_mm_samples_values = (task2_best_mm_samples.values)
	task2_mm_predicted=[]
	task2_mmprop = BeliefPropagation(task2_best_mm)
	for i in range(1000):
		nik_temp = np.array(task2_best_mm_samples_values[i,:])
		try:
			task2_mm_predicted.append((task2_mmprop.map_query(variables=['x1'],
														evidence={
														'x2':int(nik_temp[2]),
														'x3':int(nik_temp[1]),
														'x4':int(nik_temp[5]),
														'x5':int(nik_temp[0]),
														'x6':int(nik_temp[4])
														})))
		except:
			task2_mm_predicted.append({'x1':-1})
	cnt1=0
	cnt2=0
	data1 = task2_best_mm_samples[['x1']].as_matrix()
	data2 = task2_best_bm_samples[['x1']].as_matrix()
	for i in range(1000):
		if(task2_mm_predicted[i]['x1']==int(data1[i])):
			cnt1=cnt1+1
		#if(task2_bm_predicted[i]['x1']==int(data2[i])):
			#cnt2=cnt2+1
	task2_mm_acc = cnt1/10.0
	task2_bm_acc = cnt2/10.0
	print("	Bayesian Model for 'th' data : "+str(task2_best_bm.edges()))
	print("	Bayesian Model for 'th' data takes time : "+str(diff1))
	#print("	Bayesian Model for 'th' data has accuracy : "+str(task2_mm_acc))
	print("	Markov Model for 'th' data : "+str(task2_best_mm.edges()))
	print("	Markov Model for 'th' data takes time : "+str(diff2))
	print("	Markov Model for 'th' data has accuracy : "+str(task2_mm_acc))

	st3 = time.time()
	task4_best_bm_samples = (BayesianModelSampling(task4_best_bm)).forward_sample(size=1000)
	et3 = time.time()
	diff3 = et3 - st3
	

	'''
	task4_best_bm_samplesC = task4_best_bm_samples.copy()
	task4_best_bm_samplesC.drop('f1', axis=1, inplace=True)
	#print(task4_best_bm_samplesC)
	task4_bm_predicted = task4_best_bm.predict(task4_best_bm_samplesC)
	#print(task4_bm_predicted)
	'''
	task4_best_mm = task4_best_bm.to_markov_model()
	st4 = time.time()
	task4_best_mm_samples = (GibbsSampling(task4_best_mm)).sample(size=1000)
	et4 = time.time()
	diff4 = et4 - st4
	'''print(task4_best_mm_samples)
	task4_best_mm_samples_values = (task4_best_mm_samples.values)
	task4_mm_predicted=[]
	task4_mmprop = BeliefPropagation(task4_best_mm)
	for i in range(1000):
		nik_temp = np.array(task4_best_mm_samples_values[i,:])
		print((nik_temp))
		try:
			task4_mm_predicted.append((task4_mmprop.map_query(variables=['f1'],
														evidence={
														'f2':int(nik_temp[2]),
														'f3':int(nik_temp[1]),
														'f4':int(nik_temp[5]),
														'f5':int(nik_temp[0]),
														'f6':int(nik_temp[4]),
														'f7':int(nik_temp[2]),
														'f8':int(nik_temp[1]),
														'f9':int(nik_temp[5])
														})))
		except:
			task4_mm_predicted.append({'f1':-1})
	cnt1=0
	cnt2=0
	data1 = task4_best_mm_samples[['f1']].as_matrix()
	data2 = task4_best_bm_samples[['f1']].as_matrix()
	for i in range(1000):
		if(task2_mm_predicted[i]['x1']==int(data1[i])):
			cnt1=cnt1+1
		if(task2_bm_predicted[i]['x1']==int(data2[i])):
			cnt2=cnt2+1
	task2_mm_acc = cnt1/10.0
	task2_bm_acc = cnt2/10.0'''
	print("	Bayesian Model for 'and' data : "+str(task4_best_bm.edges()))
	print("	Bayesian Model for 'and' data takes time : "+str(diff3))
	#print("	Bayesian Model for 'th' data has accuracy : "+str(task2_mm_acc))
	print("	Markov Model for 'and' data : "+str(task4_best_mm.edges()))
	print("	Markov Model for 'and' data takes time : "+str(diff4))
Example #27
0
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample(
            [State('A', 1), State('J', 1),
             State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({1}))
        self.assertTrue(set(sample.J).issubset({1}))
        self.assertTrue(set(sample.R).issubset({1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    @patch("pgmpy.sampling.BayesianModelSampling.forward_sample",
           autospec=True)
    def test_rejection_sample_less_arg(self, forward_sample):
        sample = self.sampling_inference.rejection_sample(size=5)
        forward_sample.assert_called_once_with(self.sampling_inference, 5)
        self.assertEqual(sample, forward_sample.return_value)

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample(
            [State('A', 0), State('J', 1),
             State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model
Example #28
0
 def test_init(self):
     with self.assertRaises(TypeError):
         BayesianModelSampling(self.markov_model)
Example #29
0
def task4():
	global andRawData, task4_best_bm
	k2Scores = []
	andRawData_temp = pd.DataFrame(andRawData.values, columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9'])
	#Model 1
	est = HillClimbSearch(andRawData_temp, scoring_method=K2Score(andRawData_temp))
	model_temp = est.estimate()
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 1: Model through HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 1: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 2: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f1', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f6'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 2: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 2: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 3: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f5', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 3: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 3: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 4: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f5', 'f7'), ('f5', 'f3'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f8'),])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 4: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 4: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 5: Manual Model based on Intuition
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f4', 'f7'), ('f1', 'f2'), ('f8', 'f5'), ('f9', 'f6'), ('f9', 'f8')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 5: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 5: K2 Accuracy Score is "+str(k2Scores_temp))
	task4_best_bm = task4_bms[k2Scores.index(max(k2Scores))]
	print("	Best Bayesian Model with the highest accuracy score is thus Model "+str(1+k2Scores.index(max(k2Scores))))
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({1}))
        self.assertTrue(set(sample.J).issubset({1}))
        self.assertTrue(set(sample.R).issubset({1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True)
    def test_rejection_sample_less_arg(self, forward_sample):
        sample = self.sampling_inference.rejection_sample(size=5)
        forward_sample.assert_called_once_with(self.sampling_inference, 5)
        self.assertEqual(sample, forward_sample.return_value)

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model
def bayesian_net():
    musicianship_model = BayesianModel([('Difficulty', 'Rating'),
                                        ('Musicianship', 'Rating'),
                                        ('Musicianship', 'Exam'),
                                        ('Rating', 'Letter')])
    cpd_diff = TabularCPD(variable='Difficulty',
                          variable_card=2,
                          values=[[0.6], [0.4]])  #0->Low, 1->High
    cpd_music = TabularCPD(variable='Musicianship',
                           variable_card=2,
                           values=[[0.7], [0.3]])  #0->Weak 1->Strong
    cpd_rating = TabularCPD(variable='Rating',
                            variable_card=3,
                            values=[[0.3, 0.05, 0.9, 0.5],
                                    [0.4, 0.25, 0.08, 0.3],
                                    [0.3, 0.7, 0.02, 0.2]],
                            evidence=['Difficulty', 'Musicianship'],
                            evidence_card=[2, 2])  #0->* 1->** 2-->***
    cpd_exam = TabularCPD(variable='Exam',
                          variable_card=2,
                          values=[[0.95, 0.2], [0.05, 0.8]],
                          evidence=['Musicianship'],
                          evidence_card=[2])  #0-->Low 1-->High

    cpd_letter = TabularCPD(variable='Letter',
                            variable_card=2,
                            values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                            evidence=['Rating'],
                            evidence_card=[3])  #0-->Weak 1-->Strong

    musicianship_model.add_cpds(cpd_diff, cpd_music, cpd_rating, cpd_exam,
                                cpd_letter)
    musicianship_model.check_model()

    infer = SimpleInference(musicianship_model)  # query without normalization

    print('------------------------')
    print(' EXACT INFERENCE')
    print('------------------------')
    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NOT NORMALIZED'
    )
    print('--------------------')
    print(
        infer.query(['Letter'],
                    evidence={('Difficulty', 0), ('Musicianship', 1),
                              ('Rating', 1), ('Exam', 1)}))
    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NORMALIZED'
    )
    print('--------------------')
    infer = VariableElimination(musicianship_model)  # query normalized
    print(
        infer.query(['Letter'],
                    evidence={
                        'Difficulty': 0,
                        'Musicianship': 1,
                        'Rating': 1,
                        'Exam': 1
                    })['Letter'])

    print('--------------------')
    print(' QUERY Letter with no evidence')
    print('--------------------')
    print(infer.query(['Letter'])['Letter'])
    print('--------------------')
    print(' QUERY Letter with evidence Musicianship: 0  NORMALIZED')
    print('--------------------')
    print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])

    sampling = BayesianModelSampling(musicianship_model)
    data = sampling.likelihood_weighted_sample(evidence={},
                                               size=2000,
                                               return_type='dataframe')

    musicianship_model_bis = BayesianModel([('Difficulty', 'Rating'),
                                            ('Musicianship', 'Rating'),
                                            ('Rating', 'Letter'),
                                            ('Musicianship', 'Exam')])
    musicianship_model_bis.fit(data, estimator=BayesianEstimator)
    musicianship_model_bis.check_model()
    infer = VariableElimination(musicianship_model_bis)  # query normalized
    for cpd in musicianship_model_bis.get_cpds():
        print("CPD of {variable}:".format(variable=cpd.variable))
        print(cpd)

    print('------------------------')
    print(' APPROXIMATE INFERENCE')
    print('------------------------')

    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NORMALIZED'
    )
    print('--------------------')

    print(
        infer.query(['Letter'],
                    evidence={
                        'Difficulty': 0,
                        'Musicianship': 1,
                        'Rating': 1,
                        'Exam': 1
                    })['Letter'])

    print('--------------------')
    print(' QUERY Letter with no evidence')
    print('--------------------')
    print(infer.query(['Letter'])['Letter'])
    print('--------------------')
    print(' QUERY Letter with evidence Musicianship: 0  NORMALIZED')
    print('--------------------')
    print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])
    belpro.map_query(variables=['attendance'],
                     evidence={
                         'difficulty': 2,
                         'Q9': 3
                     }))
# print(belpro.map_query(variables=['Q25', 'Q18','Q16'],evidence={'instr':1}))
print(
    belpro.map_query(variables=['attendance', 'Q9', 'difficulty'],
                     evidence={'class': 7}))

#Commented some queries because taking a lot of time to run

# print(belpro.map_query(variables=['Q28','Q11'],evidence={'instr':2, 'class':10}))
# print(belpro.map_query(variables=['Q18', 'Q26','Q13'],evidence={'instr':2}))
# print(belpro.map_query(variables=['Q23', 'Q21','Q17'],evidence={'instr':2}))
inference = BayesianModelSampling(bayesmodel)

df = inference.forward_sample(5)
# print df.shape
print df
print np.mean(df)
# print scipy.stats.entropy(df)

dataarray = panda.DataFrame.as_matrix(df)
print dataarray
arr = dataarray.astype(float)
print arr
sum1 = []
total = 0
count = 0