def main():
    """
    This is the main function which is used to run all the algorithms
    :return:
    """
    log_actual_pr = log10(get_pr.get_pr(pr_file_name))
    if type_of_algorithm == "-vec":
        errors = []
        timer = []
        for each in range(10):
            random.seed(each)
            start_time_normal = time.time()
            num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
                uai_file_name)
            evidence = get_evidence_data.get_evidence(evidence_file_name)
            var_in_clique, distribution_array = helper.instantiate(num_of_var_in_clique, evidence, cardinalities,
                                                                   var_in_clique, distribution_array)
            estimate = sampling_VE.sampling_VE(num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique,
                                               var_in_clique, distribution_array, w_cutset, num_samples)
            time_normal = (time.time() - start_time_normal)
            log_predicted_pr = log10(estimate)
            actual = (log_actual_pr - log_predicted_pr) / log_actual_pr
            errors.append(actual)
            timer.append(time_normal)
        mean_error = numpy.mean(errors)
        std_error = numpy.var(errors)
        mean_time = numpy.mean(timer)
        std_time = numpy.var(timer)
        print("The error is", mean_error, "±", numpy.sqrt(std_error))
        print("The time is", mean_time, "±", numpy.sqrt(std_time))

    elif type_of_algorithm == "-avec":
        errors = []
        timer = []
        for each in range(10):
            random.seed(each)
            start_time_normal = time.time()
            num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
                uai_file_name)
            evidence = get_evidence_data.get_evidence(evidence_file_name)
            var_in_clique, distribution_array = helper.instantiate(num_of_var_in_clique, evidence, cardinalities,
                                                                   var_in_clique, distribution_array)
            estimate_2 = sampling_VE_adaptive_proposal_distribution.sampling_VE(num_of_var, cardinalities,
                                                                                num_of_cliques, num_of_var_in_clique,
                                                                                var_in_clique, distribution_array,
                                                                                w_cutset, num_samples)
            time_normal = (time.time() - start_time_normal)
            log_predicted_pr = log10(estimate_2)
            actual = (log_actual_pr - log_predicted_pr) / log_actual_pr
            errors.append(actual)
            timer.append(time_normal)
        mean_error = numpy.mean(errors)
        std_error = numpy.var(errors)
        mean_time = numpy.mean(timer)
        std_time = numpy.var(timer)
        print("The error is", mean_error, "±", numpy.sqrt(std_error))
        print("The time is", mean_time, "±", numpy.sqrt(std_time))
    else:
        print("Please give correct algorithm name")
Example #2
0
def variable_elimination(uai_file_name, evidence_file_name):
    """
    Main function used to run the variable elimination algorithm
    @param uai_file_name: The name of the main uai file
    @param evidence_file_name: THe name of the evidence file
    @return: The value of the partition function
    """
    num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
        uai_file_name)
    evidence = get_evidence_data.get_evidence(evidence_file_name)
    min_degree_for_each_var, sorted_variable = helper.compute_ordering(
        num_of_var, var_in_clique, evidence)
    var_in_clique, distribution_array = helper.instantiate(
        num_of_var_in_clique, evidence, cardinalities, var_in_clique,
        distribution_array)
    for each_var in sorted_variable:
        each_clique = 0
        while each_clique < len(var_in_clique):
            each_clique_2 = each_clique + 1
            while each_clique_2 < len(var_in_clique):
                # For each clique check if they have common variables (the var to be eliminated)
                if var_in_clique[each_clique] is not None and var_in_clique[each_clique].__len__() != 0 and \
                        var_in_clique[each_clique_2] is not None and var_in_clique[
                    each_clique_2].__len__() != 0 and each_var in var_in_clique[each_clique] and each_var in \
                        var_in_clique[each_clique_2]:
                    # Take the product if variables are common
                    new_factor, var_in_output = helper.product_of_factors(
                        distribution_array[each_clique],
                        distribution_array[each_clique_2],
                        var_in_clique[each_clique],
                        var_in_clique[each_clique_2], cardinalities)
                    distribution_array[each_clique_2] = None
                    # Update the probability array and variables array
                    distribution_array[each_clique] = new_factor
                    var_in_clique[each_clique] = var_in_output
                    var_in_clique[each_clique_2] = None
                    num_of_var_in_clique[each_clique_2] = 0
                    num_of_var_in_clique[each_clique] = len(var_in_output)
                    num_of_cliques -= 1
                each_clique_2 += 1
            if var_in_clique[each_clique] is not None and var_in_clique[each_clique].__len__() != 0 and each_var in \
                    var_in_clique[each_clique]:
                # If the var to be eliminated is in the clique then sum it out
                new_factor, var_in_output = helper.sum_out(
                    distribution_array[each_clique], each_var,
                    var_in_clique[each_clique], cardinalities)
                # Update the distribution
                distribution_array[each_clique] = new_factor
                var_in_clique[each_clique] = var_in_output
            each_clique += 1
    z = 1
    # Take product of all values to get the partition function
    for each_clique in distribution_array:
        if each_clique is not None:
            for each_val in each_clique:
                if each_val != 0:
                    z *= each_val
    # Taking the logarithm of partition function
    ans = log10(z)
    return ans
Example #3
0
def main():
    """
    This is the main function which is used to run all the algorithms
    :return:
    """
    log_actual_pr = log10(get_pr.get_pr(pr_file_name))
    if type_of_algorithm == "-vec":
        start_time_normal = time.time()
        num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
            uai_file_name)
        evidence = get_evidence_data.get_evidence(evidence_file_name)
        var_in_clique, distribution_array = helper.instantiate(
            num_of_var_in_clique, evidence, cardinalities, var_in_clique,
            distribution_array)
        estimate = sampling_VE.sampling_VE(num_of_var, cardinalities,
                                           num_of_cliques,
                                           num_of_var_in_clique, var_in_clique,
                                           distribution_array, w_cutset,
                                           num_samples)
        time_normal = (time.time() - start_time_normal)
        print(
            "The estimate for partition function or the probability of evidence is",
            estimate)
        print("The time is", time_normal)

    elif type_of_algorithm == "-avec":
        start_time_normal = time.time()
        num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
            uai_file_name)
        evidence = get_evidence_data.get_evidence(evidence_file_name)
        var_in_clique, distribution_array = helper.instantiate(
            num_of_var_in_clique, evidence, cardinalities, var_in_clique,
            distribution_array)
        estimate_2 = sampling_VE_adaptive_proposal_distribution.sampling_VE(
            num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique,
            var_in_clique, distribution_array, w_cutset, num_samples)
        time_normal = (time.time() - start_time_normal)
        print(
            "The estimate partition function or the probability of evidence is",
            estimate_2)
        print("The time is", time_normal)
    else:
        print("Please give correct algorithm name")
Example #4
0
def main():
    """
    This is the main function which is used to run all the algorithms
    :return:
    """

    uai_file = glob.glob("D:\Spring 20\Stats in AI and ML\HW\HW3\Code\1.uai")
    evid_file = glob.glob(
        "D:\Spring 20\Stats in AI and ML\HW\HW3\Code\1.uai.evid")
    pr_file = glob.glob("D:\Spring 20\Stats in AI and ML\HW\HW3\Code\1.uai.PR")
    files = zip(uai_file, evid_file, pr_file)
    num_samples = [10]
    w_cutset = [1]
    c = tuple(itertools.product(w_cutset, num_samples))
    final_output = dict()
    timer = dict()
    for each_file in files:
        file_name = each_file[0].split("\\")
        file_name = file_name[1].split(".")[0]
        print("The file we are processing is", file_name)
        uai_file_name = each_file[0]
        evidence_file_name = each_file[1]
        pr_file_name = each_file[2]
        final_output[file_name] = {}
        final_output[file_name]["normal"] = {}
        final_output[file_name]["adaptive"] = {}
        timer[file_name] = {}
        timer[file_name]["normal"] = {}
        timer[file_name]["adaptive"] = {}
        for each_val in c:
            print("The value of c is", each_val)
            error_for_iter_normal = []
            error_for_iter_adaptive = []
            time_for_iter_normal = []
            time_for_iter_adaptive = []
            for each_iter in range(1):
                random.seed(each_iter)
                start_time_normal = time.time()
                num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
                    uai_file_name)
                evidence = get_evidence_data.get_evidence(evidence_file_name)
                var_in_clique, distribution_array = helper.instantiate(
                    num_of_var_in_clique, evidence, cardinalities,
                    var_in_clique, distribution_array)
                estimate_1 = sampling_VE.sampling_VE(num_of_var, cardinalities,
                                                     num_of_cliques,
                                                     num_of_var_in_clique,
                                                     var_in_clique,
                                                     distribution_array,
                                                     each_val[0], each_val[1])
                time_normal = (time.time() - start_time_normal)
                time_for_iter_normal.append(time_normal)
                start_time_adaptive = time.time()
                num_of_var, cardinalities, num_of_cliques, num_of_var_in_clique, var_in_clique, distribution_array = get_data_uai.get_uai_data(
                    uai_file_name)
                evidence = get_evidence_data.get_evidence(evidence_file_name)
                var_in_clique, distribution_array = helper.instantiate(
                    num_of_var_in_clique, evidence, cardinalities,
                    var_in_clique, distribution_array)
                estimate_2 = sampling_VE_adaptive_proposal_distribution.sampling_VE(
                    num_of_var, cardinalities, num_of_cliques,
                    num_of_var_in_clique, var_in_clique, distribution_array,
                    each_val[0], each_val[1])
                time_adaptive = (time.time() - start_time_adaptive)
                time_for_iter_adaptive.append(time_adaptive)
                print(estimate_1, estimate_2)
                log_actual_pr = log10(get_pr.get_pr(pr_file_name))
                log_predicted_pr = log10(estimate_1)
                log_predicted_pr_adaptive = log10(estimate_2)
                actual = (log_actual_pr - log_predicted_pr) / log_actual_pr
                adaptive = (log_actual_pr -
                            log_predicted_pr_adaptive) / log_actual_pr
                error_for_iter_normal.append(actual)
                error_for_iter_adaptive.append(adaptive)
                print("Normal :-", actual, time_normal)
                print("adaptive :-", adaptive, time_adaptive)

            timer[file_name]["adaptive"][each_val] = time_adaptive
            timer[file_name]["normal"][each_val] = time_normal
            final_output[file_name]["adaptive"][
                each_val] = error_for_iter_adaptive
            final_output[file_name]["normal"][each_val] = error_for_iter_normal
        try:
            error_file_name = "error_" + file_name + ".txt"
            f = open(error_file_name, "w")
            f.write(str(final_output))
            f.close()
            time_file_name = "time_" + file_name + ".txt"
            f = open(time_file_name, "w")
            f.write(str(timer))
            f.close()
        except:
            print("IO error")
    print(final_output)
    print(timer)

    return final_output, timer
Example #5
0
def sampling_VE(num_of_var, cardinalities, num_of_cliques,
                num_of_var_in_clique, var_in_clique, distribution_array,
                w_cutset_bound, num_samples):
    """
    This is the main function used to do sampling VE
    @param num_of_var: The numbre of variables in the PGM
    @param cardinalities: The cardialities of the PGM
    @param num_of_cliques: The number of cliques in the PGM
    @param num_of_var_in_clique: The number of variables in each clique
    @param var_in_clique: The variables in each clique
    @param distribution_array: The distribution array for given PGM
    @param w_cutset_bound: The cutset bound for the PGM
    @param num_samples: The number of samples for the algorithm
    @return: The predicted value of Z or probability
    """
    z = 0
    X = w_cutset(num_of_var, var_in_clique, w_cutset_bound)
    cardinalities = np.array(cardinalities)
    num_of_var_in_x = len(X)
    cardinalities_of_x = cardinalities[X]
    num_of_cliques_in_X = 1
    var_in_clique_X = X
    if num_of_var_in_x != 0:
        sum_of_log_of_cardinalities = np.sum(np.log10(cardinalities_of_x))
        distribution_array_X = dict()
        distribution_array_X["FIRST"] = [1 / sum_of_log_of_cardinalities]
        uniform_dist = 1 / sum_of_log_of_cardinalities
    else:
        distribution_array_X = [1]
        uniform_dist = 1
    num = {}
    denom = 0
    weights = []
    for each_N in range(num_samples):
        distribution_array1 = distribution_array.copy()
        var_in_clique1 = var_in_clique.copy()
        num_of_var_in_clique1 = num_of_var_in_clique.copy()
        num_of_var1 = num_of_var
        evidence = []
        for each_evidence in range(num_of_var_in_x):
            evidence.append((var_in_clique_X[each_evidence],
                             randint(0,
                                     cardinalities_of_x[each_evidence] - 1)))
        # evidence = [(each_var, each_val) for (each_var, each_val) in zip(var_in_clique_X, sample)]
        var_in_clique1, distribution_array1 = helper.instantiate(
            num_of_var1, evidence, cardinalities, var_in_clique1,
            distribution_array1)
        var_elem_sol = variable_elimination.variable_elimination(
            num_of_var1, cardinalities, num_of_cliques, num_of_var_in_clique1,
            var_in_clique1, distribution_array1, evidence)
        sample = [one_value for (var, one_value) in evidence]
        sample_string = sample.__str__()
        evidence_tuple = helper.get_index_given_truth_values(
            var_in_clique_X, sample, cardinalities_of_x)
        if distribution_array_X is list:
            Q = uniform_dist
        else:
            if sample_string in distribution_array_X:
                Q = distribution_array_X[sample_string]
            else:
                Q = uniform_dist
        weight = var_elem_sol / Q
        weights.append(weight)
        denom += weight
        if sample_string not in num:
            num[sample_string] = weight
        else:
            num[sample_string] += weight
        if each_N % 100 == 0 and each_N != 0 and distribution_array_X is not list:
            distribution_array_X = {}
            for each in num:
                distribution_array_X[each] = num[each] / denom
    weights = np.array(weights)
    if (weights[1:] == weights[:-1]).all:
        z = np.sum(weights)
    else:
        z = helper.threshold(helper.logsumexp(weights))
    """if weight != float('inf'):
            z += weight
        else:
            z += sys.float_info.max * np.random.uniform(0,2)"""
    return z / num_samples