Esempio n. 1
0
def get_formulas(qrole, filename):
    auto = True if "auto" in filename else False
    data = utils.load_flattened_data(filename)
    corr_mat = np.asarray(data[1:])
    role2idx = {data[0][idx]: idx for idx in range(len(data[0]))}
    idx2role = {idx: data[0][idx] for idx in range(len(data[0]))}
    formulas = set()
    qidx = role2idx[qrole]
    corr_col = corr_mat[:,qidx]
    if auto:  # auto select formula for top 2 of matrix
        bidx = np.argpartition(corr_col, -1)[-1:]
    else:  # select all manually set formulas in matrix (non-zero)
        bidx = {}
        for idx in range(corr_col.shape[0]):
            formula_id = int(corr_col[idx])
            if formula_id > 0:
                if formula_id in bidx:
                    bidx[formula_id].append(idx)
                else:
                    bidx[formula_id] = [idx]
    for fidx in bidx.keys():
        formula = []
        for pidx in bidx[fidx]:
            formula += [idx2role[pidx]]
        formula += [qrole]
        formula = tuple(sorted(formula))
        formulas.add(formula)
    return formulas
Esempio n. 2
0
def get_formulas(filename):
    auto = True if "auto" in filename else False
    data = utils.load_flattened_data(filename)
    corr_mat = np.asarray(data[1:])
    role2idx = {data[0][idx]: idx for idx in range(len(data[0]))}
    idx2role = {idx: data[0][idx] for idx in range(len(data[0]))}
    formulas = set()
    for qrole, qidx in role2idx.items():
        corr_col = corr_mat[:, qidx]
        if auto:  # auto select formula for top 2 of matrix
            bidx = np.argpartition(corr_col, -1)[-1:]
        else:  # select all manually set formulas in matrix (non-zero)
            bidx = [
                idx for idx in range(corr_col.shape[0]) if corr_col[idx] > 0.0
            ]
        for idx in bidx:
            formula = tuple(sorted([idx2role[idx]] + [qrole]))
            formulas.add(formula)
    return formulas
Esempio n. 3
0
                     type=str,
                     help="(.txt)",
                     nargs="?",
                     default="./data/val_data_negative.txt")
 parser.add_argument("--roles_file",
                     type=str,
                     help="(.txt)",
                     nargs="?",
                     default="./data/role_to_values.txt")
 args = parser.parse_args()
 # loads the MLN, DBs, and instances
 with open(args.roles_file, "r") as f:
     roles = eval(f.readlines()[0])
 mln = MLN.load(args.input_mln)
 dbs = Database.load(mln, args.positive_database)
 p_examples = utils.load_flattened_data(args.positive_dataset)
 n_examples = utils.load_flattened_data(args.negative_dataset)
 test_times = []
 # begins testing roles
 for role in roles.keys():
     start = time()
     # creates testing DBs with labels
     test_dbs = generate_test_dbs(role, dbs)
     # gets MLN scores
     scores = score_mln(mln, role, test_dbs)
     # makes instance-score datastructure
     instance_scores = scores2instance_scores(role, roles, p_examples,
                                              n_examples, scores)
     # gets metrics for the role
     utils.compute_metric_scores(p_examples,
                                 n_examples,
Esempio n. 4
0
 parser.add_argument("--formula_file",
                     type=str,
                     help="(.mx)",
                     nargs="?",
                     default="./data/formula_matrix_auto.mx")
 parser.add_argument("--output_mln",
                     type=str,
                     help="(.mln)",
                     nargs="?",
                     default="./models/initial.mln")
 args = parser.parse_args()
 # loads the data for MLN
 roles = utils.load_roles(args.roles_file)
 instances = []
 for dataset in args.input_datasets:
     instances += utils.load_flattened_data(dataset)
 role_constraints = utils.get_role_constraints(roles, instances)
 formulas = get_formulas(args.formula_file)
 domains = get_domains(roles, instances)
 # generates the markov logic network
 mln = MLN(logic="FirstOrderLogic", grammar="PRACGrammar")
 for domain, values in domains.items():  # domains
     for value in values:
         if len(value) > 1:
             const = ''.join(value)
         elif len(value) > 0:
             const = value[0]
         else:
             const = "None"
         mln.update_domain({domain: [const]})
 for role in roles.keys():  # predicates
Esempio n. 5
0
            instance.append(tuple((role, values)))
    return mlninstances


if __name__ == "__main__":
    parser = ArgumentParser(description="Role-Value Dataset 2 MLN Database")
    parser.add_argument("--input_mln", type=str, help="(.mln)", nargs="?",
                        default="./models/class_initial.mln")
    parser.add_argument("--input_datasets", type=str, help="(.txt)", nargs="*",
                        default=["./data/train_data.txt"])
    parser.add_argument("--output_database", type=str, help="(.db)", nargs="?",
                        default="./data/train.db")
    parser.add_argument("--roles_file", type=str, help="(.txt)", nargs="?",
                        default="./data/role_to_values.txt")
    args = parser.parse_args()
    # loads the initial MLN
    mln = MLN.load(args.input_mln)
    # loads data for DBs
    atoms = []
    roles = utils.load_roles(args.roles_file)
    for input_dataset in args.input_datasets:
        rv = utils.load_flattened_data(input_dataset)
        rv = rvs2mlnrvs(mln, roles, rv)
        # formats role-value to atoms
        atoms += utils.format_instances_rv2atoms(rv)
    # generates the DBs and saves
    dbs = generate_databases(mln, atoms)
    with open(args.output_database, "w") as f:
        Database.write_dbs(dbs, f)
    print("The database for the MLN is in " + args.output_database + ".")