def get_formulas(qrole, filename): auto = True if "auto" in filename else False data = utils.load_flattened_data(filename) corr_mat = np.asarray(data[1:]) role2idx = {data[0][idx]: idx for idx in range(len(data[0]))} idx2role = {idx: data[0][idx] for idx in range(len(data[0]))} formulas = set() qidx = role2idx[qrole] corr_col = corr_mat[:,qidx] if auto: # auto select formula for top 2 of matrix bidx = np.argpartition(corr_col, -1)[-1:] else: # select all manually set formulas in matrix (non-zero) bidx = {} for idx in range(corr_col.shape[0]): formula_id = int(corr_col[idx]) if formula_id > 0: if formula_id in bidx: bidx[formula_id].append(idx) else: bidx[formula_id] = [idx] for fidx in bidx.keys(): formula = [] for pidx in bidx[fidx]: formula += [idx2role[pidx]] formula += [qrole] formula = tuple(sorted(formula)) formulas.add(formula) return formulas
def get_formulas(filename): auto = True if "auto" in filename else False data = utils.load_flattened_data(filename) corr_mat = np.asarray(data[1:]) role2idx = {data[0][idx]: idx for idx in range(len(data[0]))} idx2role = {idx: data[0][idx] for idx in range(len(data[0]))} formulas = set() for qrole, qidx in role2idx.items(): corr_col = corr_mat[:, qidx] if auto: # auto select formula for top 2 of matrix bidx = np.argpartition(corr_col, -1)[-1:] else: # select all manually set formulas in matrix (non-zero) bidx = [ idx for idx in range(corr_col.shape[0]) if corr_col[idx] > 0.0 ] for idx in bidx: formula = tuple(sorted([idx2role[idx]] + [qrole])) formulas.add(formula) return formulas
type=str, help="(.txt)", nargs="?", default="./data/val_data_negative.txt") parser.add_argument("--roles_file", type=str, help="(.txt)", nargs="?", default="./data/role_to_values.txt") args = parser.parse_args() # loads the MLN, DBs, and instances with open(args.roles_file, "r") as f: roles = eval(f.readlines()[0]) mln = MLN.load(args.input_mln) dbs = Database.load(mln, args.positive_database) p_examples = utils.load_flattened_data(args.positive_dataset) n_examples = utils.load_flattened_data(args.negative_dataset) test_times = [] # begins testing roles for role in roles.keys(): start = time() # creates testing DBs with labels test_dbs = generate_test_dbs(role, dbs) # gets MLN scores scores = score_mln(mln, role, test_dbs) # makes instance-score datastructure instance_scores = scores2instance_scores(role, roles, p_examples, n_examples, scores) # gets metrics for the role utils.compute_metric_scores(p_examples, n_examples,
parser.add_argument("--formula_file", type=str, help="(.mx)", nargs="?", default="./data/formula_matrix_auto.mx") parser.add_argument("--output_mln", type=str, help="(.mln)", nargs="?", default="./models/initial.mln") args = parser.parse_args() # loads the data for MLN roles = utils.load_roles(args.roles_file) instances = [] for dataset in args.input_datasets: instances += utils.load_flattened_data(dataset) role_constraints = utils.get_role_constraints(roles, instances) formulas = get_formulas(args.formula_file) domains = get_domains(roles, instances) # generates the markov logic network mln = MLN(logic="FirstOrderLogic", grammar="PRACGrammar") for domain, values in domains.items(): # domains for value in values: if len(value) > 1: const = ''.join(value) elif len(value) > 0: const = value[0] else: const = "None" mln.update_domain({domain: [const]}) for role in roles.keys(): # predicates
instance.append(tuple((role, values))) return mlninstances if __name__ == "__main__": parser = ArgumentParser(description="Role-Value Dataset 2 MLN Database") parser.add_argument("--input_mln", type=str, help="(.mln)", nargs="?", default="./models/class_initial.mln") parser.add_argument("--input_datasets", type=str, help="(.txt)", nargs="*", default=["./data/train_data.txt"]) parser.add_argument("--output_database", type=str, help="(.db)", nargs="?", default="./data/train.db") parser.add_argument("--roles_file", type=str, help="(.txt)", nargs="?", default="./data/role_to_values.txt") args = parser.parse_args() # loads the initial MLN mln = MLN.load(args.input_mln) # loads data for DBs atoms = [] roles = utils.load_roles(args.roles_file) for input_dataset in args.input_datasets: rv = utils.load_flattened_data(input_dataset) rv = rvs2mlnrvs(mln, roles, rv) # formats role-value to atoms atoms += utils.format_instances_rv2atoms(rv) # generates the DBs and saves dbs = generate_databases(mln, atoms) with open(args.output_database, "w") as f: Database.write_dbs(dbs, f) print("The database for the MLN is in " + args.output_database + ".")