def get_rewrites(self, ast): rewrites = set([ast]) s1 = data_tools.ast2template(ast, loose_constraints=True) c = self.cursor for s1, s2 in c.execute("SELECT s1, s2 FROM Rewrites WHERE s1 = ?", (s1,)): rw = rewrite(ast, s2) if not rw is None: rewrites.add(rw) return rewrites
print("train entropy = {}".format(entropy(train_set))) dev_by_nl = group_data_by_nl(dev_set, use_nl_temp=FLAGS.dataset.startswith("bash")) print("dev cmd/nl ratio = {}".format(ratio(dev_by_nl, 1))) print("dev %nl(cmd+) = {}".format(pp(dev_by_nl))) print("dev nl overlap = {}".format(overlap(train_by_nl, dev_by_nl))) print("dev entropy = {}".format(entropy(dev_set))) test_by_nl = group_data_by_nl(test_set, use_nl_temp=FLAGS.dataset.startswith("bash")) print("test cmd/nl ratio = {}".format(ratio(test_by_nl, 1))) print("test %nl(cmd+) = {}".format(pp(test_by_nl))) print("test nl overlap = {}".format(overlap(train_by_nl, test_by_nl))) print("test entropy = {}".format(entropy(test_set))) print("total entropy = {}".format(entropy(train_set + dev_set + test_set))) train_by_cm = group_data_by_cm(train_set, use_cm_temp=FLAGS.dataset.startswith("bash")) print(len(train_by_cm)) print("train nl/cmd ratio = {}".format(ratio(train_by_cm, 0))) print("train %cmd(nl+) = {}".format(pp(train_by_cm))) dev_by_cm = group_data_by_cm(dev_set, use_cm_temp=FLAGS.dataset.startswith("bash")) print("dev nl/cmd ratio = {}".format(ratio(dev_by_cm, 0))) print("dev %cmd(nl+) = {}".format(pp(dev_by_cm))) print("dev cm overlap = {}".format(overlap(train_by_cm, dev_by_cm))) test_by_cm = group_data_by_cm(test_set, use_cm_temp=FLAGS.dataset.startswith("bash")) print("test nl/cmd ratio = {}".format(ratio(test_by_cm, 0))) print("test %cmd(nl+) = {}".format(pp(test_by_cm))) print("test cm overlap = {}".format(overlap(train_by_cm, test_by_cm))) if __name__ == "__main__": ast = data_tools.paren_parser(sys.argv[1]) data_tools.pretty_print(ast) print(data_tools.ast2template( ast, loose_constraints=True, arg_type_only=True))