def main(): #example invocation: python3 blocksworld.py -n 5 -o 5 -a 0.8 parser = argparse.ArgumentParser() parser.add_argument("-n", "--size", required=True, help="number of cells in blocksworld") parser.add_argument("-o", "--obstacles", required=True, help="number of obstacles in blocksworld") parser.add_argument("-a", "--alpha", required=True, help="LNN hyperparameter alpha") cl_args = parser.parse_args() dim = int(cl_args.size) obs = int(cl_args.obstacles) a = float(cl_args.alpha) option = 1 old_obstacle4left = None old_obstacle4right = None old_obstacle4up = None old_obstacle4down = None old_target4left = None old_target4right = None old_target4up = None old_target4down = None for i in range(50): #500 go_left, go_right, go_up, go_down, \ left_hasobstacle, right_hasobstacle, up_hasobstacle, down_hasobstacle, \ left_hastarget, right_hastarget, up_hastarget, down_hastarget, \ left_noobstacle, right_noobstacle, up_noobstacle, down_noobstacle, \ left_notarget, right_notarget, up_notarget, down_notarget \ = genblocksworld(dim, obs) #the basic rule structure is: # obstacle pred \wedge target pred -> go pred #problem is we don't know which obstacle predicate is relevant to moving #in which direction and similary, we don't know which target predicate #is relevant to moving in which direction. this is the learning task. #so we create all possible obstacles predicates and their negations, #and encapsulate these in a MetaPredicate to express a choice over these. #similarly, we create all possible target predicates and their negations #to encapsulate them in a MetaPredicate to express a choice. lastly, #we use a MetaRule object to express a conjunction between the chosen #obstacle predicate (or its negation) and the chosen target predicate #(or its negation). obstacle4left = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4left.alpha = old_obstacle4left.alpha if old_obstacle4left else obstacle4left.alpha target4left = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4left.alpha = old_target4left.alpha if old_target4left else target4left.alpha left_rule = MetaRule([obstacle4left, target4left], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4right = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4right.alpha = old_obstacle4right.alpha if old_obstacle4right else obstacle4right.alpha target4right = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4right.alpha = old_target4right.alpha if old_target4right else target4right.alpha right_rule = MetaRule([obstacle4right, target4right], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4up = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4up.alpha = old_obstacle4up.alpha if old_obstacle4up else obstacle4up.alpha target4up = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4up.alpha = old_target4up.alpha if old_target4up else target4up.alpha up_rule = MetaRule([obstacle4up, target4up], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4down = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4down.alpha = old_obstacle4down.alpha if old_obstacle4down else obstacle4down.alpha target4down = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4down.alpha = old_target4down.alpha if old_target4down else target4down.alpha down_rule = MetaRule([obstacle4down, target4down], [[['x', 'y'], ['x', 'y']]], a, False, option) left_rule.df['id'] = list(left_rule.df.index) yl = torch.FloatTensor( go_left.merge(left_rule.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) left_rule.df.drop(['id'], axis=1, inplace=True) right_rule.df['id'] = list(right_rule.df.index) yr = torch.FloatTensor( go_right.merge(right_rule.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) right_rule.df.drop(['id'], axis=1, inplace=True) up_rule.df['id'] = list(up_rule.df.index) yu = torch.FloatTensor( go_up.merge(up_rule.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) up_rule.df.drop(['id'], axis=1, inplace=True) down_rule.df['id'] = list(down_rule.df.index) yd = torch.FloatTensor( go_down.merge(down_rule.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) down_rule.df.drop(['id'], axis=1, inplace=True) y = torch.cat((yl, yr, yu, yd), 1) #we will be learning rules to move in all 4 directions simultaeneously rewards = [] optimizer = optim.Adam([{'params': left_rule.parameters()}, \ {'params': right_rule.parameters()}, \ {'params': up_rule.parameters()}, \ {'params': down_rule.parameters()}] , lr=0.01) x = torch.arange(len(left_rule.df.index)) for iter in range(10): left_rule.train() right_rule.train() up_rule.train() down_rule.train() optimizer.zero_grad() left_yhat, slacks = left_rule(x) right_yhat, slacks = right_rule(x) up_yhat, slacks = up_rule(x) down_yhat, slacks = down_rule(x) unnorm_probs = torch.cat( (left_yhat, right_yhat, up_yhat, down_yhat), 1) probs = torch.log( torch.div(unnorm_probs, torch.sum(unnorm_probs, 0))) mean_rewards = torch.sum(torch.mul(y, probs), 0) loss = -mean_rewards.mean() rewards.append(-loss.item()) loss.backward() optimizer.step() np.set_printoptions(precision=3, suppress=True) print("Epoch " + str(i) + " rewards: " + str(np.around(rewards[0], decimals=3)) \ + " -> " + str(np.around(rewards[len(rewards)-1], decimals=3))) #printing the learned parameters so far #print_predicates(obstacle4left, obstacle4right, obstacle4up, obstacle4down, \ # target4left, target4right, target4up, target4down, \ # left_rule, right_rule, up_rule, down_rule) if option == 0: print_predicates1(obstacle4left, obstacle4right, obstacle4up, obstacle4down, \ target4left, target4right, target4up, target4down, \ left_rule, right_rule, up_rule, down_rule) else: print_predicates_neurallp(obstacle4left, obstacle4right, obstacle4up, obstacle4down, \ target4left, target4right, target4up, target4down) old_obstacle4left = obstacle4left old_obstacle4right = obstacle4right old_obstacle4up = obstacle4up old_obstacle4down = obstacle4down old_target4left = target4left old_target4right = target4right old_target4up = target4up old_target4down = target4down rewards = [] for i in range(50): #500 go_left, go_right, go_up, go_down, \ left_hasobstacle, right_hasobstacle, up_hasobstacle, down_hasobstacle, \ left_hastarget, right_hastarget, up_hastarget, down_hastarget, \ left_noobstacle, right_noobstacle, up_noobstacle, down_noobstacle, \ left_notarget, right_notarget, up_notarget, down_notarget \ = genblocksworld(dim, obs) obstacle4left_test = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4left_test.alpha = obstacle4left.alpha target4left_test = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4left_test.alpha = target4left.alpha left_rule_test = MetaRule([obstacle4left_test, target4left_test], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4right_test = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4right_test.alpha = obstacle4right.alpha target4right_test = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4right_test.alpha = target4right.alpha right_rule_test = MetaRule([obstacle4right_test, target4right_test], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4up_test = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4up_test.alpha = obstacle4up.alpha target4up_test = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4up_test.alpha = target4up.alpha up_rule_test = MetaRule([obstacle4up_test, target4up_test], [[['x', 'y'], ['x', 'y']]], a, False, option) obstacle4down_test = MetaPredicate([BasePredicate(left_noobstacle) \ , BasePredicate(right_noobstacle)\ , BasePredicate(up_noobstacle) \ , BasePredicate(down_noobstacle) \ , BasePredicate(left_hasobstacle) \ , BasePredicate(right_hasobstacle) \ , BasePredicate(up_hasobstacle) \ , BasePredicate(down_hasobstacle)], option) obstacle4down_test.alpha = obstacle4down.alpha target4down_test = MetaPredicate([BasePredicate(left_hastarget) \ , BasePredicate(right_hastarget) \ , BasePredicate(up_hastarget) \ , BasePredicate(down_hastarget) , BasePredicate(left_notarget) \ , BasePredicate(right_notarget) \ , BasePredicate(up_notarget) \ , BasePredicate(down_notarget)], option) target4down_test.alpha = target4down.alpha down_rule_test = MetaRule([obstacle4down_test, target4down_test], [[['x', 'y'], ['x', 'y']]], a, False, option) left_rule_test.df['id'] = list(left_rule_test.df.index) yl = torch.FloatTensor( go_left.merge(left_rule_test.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) left_rule_test.df.drop(['id'], axis=1, inplace=True) right_rule_test.df['id'] = list(right_rule_test.df.index) yr = torch.FloatTensor( go_right.merge(right_rule_test.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) right_rule_test.df.drop(['id'], axis=1, inplace=True) up_rule_test.df['id'] = list(up_rule_test.df.index) yu = torch.FloatTensor( go_up.merge(up_rule_test.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) up_rule_test.df.drop(['id'], axis=1, inplace=True) down_rule_test.df['id'] = list(down_rule_test.df.index) yd = torch.FloatTensor( go_down.merge(down_rule_test.df, on=["x", "y"], how='right').sort_values('id')[["Label"]].values) down_rule_test.df.drop(['id'], axis=1, inplace=True) y = torch.cat((yl, yr, yu, yd), 1) for iter in range(10): left_yhat, slacks = left_rule_test(x) right_yhat, slacks = right_rule_test(x) up_yhat, slacks = up_rule_test(x) down_yhat, slacks = down_rule_test(x) unnorm_probs = torch.cat( (left_yhat, right_yhat, up_yhat, down_yhat), 1) probs = torch.log( torch.div(unnorm_probs, torch.sum(unnorm_probs, 0))) mean_rewards = torch.sum(torch.mul(y, probs), 0) loss = -mean_rewards.mean() rewards.append(-loss.item()) print(rewards)
#creating inv name = "inv" + name cols = df.columns.tolist() inv_df = df[[cols[1], cols[0]]].copy() inv_df.columns = [cols[0], cols[1]] rel_names_train.append(name) relations_train.append(inv_df) labels_df_train.columns = [attr_name + "0", attr_name + "3"] labels_df_train['Label'] = 1.0 body0 = BaseMetaPredicate(relations_train) print("done body0 (" + str(time.time()-begtime) + "s)") body1 = copy.deepcopy(body0) body1.df.columns = [attr_name + "2", attr_name + "3"] join = MetaRule([body0, body1], [[[attr_name + "1"], [attr_name + "2"]]], alpha, False) print("done join (" + str(time.time()-begtime) + "s)") proj = Project(join, [attr_name + "0", attr_name + "3"]) print("done project (" + str(time.time()-begtime) + "s)") metap = copy.deepcopy(body0) metap.df.columns = [attr_name + "0", attr_name + "3"] disj = DisjunctionRule([metap, proj], alpha, 0) print("done disjunction (" + str(time.time()-begtime) + "s)") meta = disj df = labels_df_train label = 'Label' step = 1e-3 batch_size = 32 epochs = 1000 y = align_labels(meta, df, label)
elif len(colnames) == 1: action_rel_names_train.append(name) action_relations_train.append(BasePredicate(df)) action_attr = colnames[0] else: tails_relation_train = BasePredicate(df) projs = [] for i in range(numrules): join_attrs = [] body = [tails_relation_train] for j in range(rulelen): body.append(MetaPredicate(action_relations_train)) join_attrs.append([[action_attr], [action_attr]]) join = MetaRule(body, join_attrs, alpha, True) proj = Project(join, [sentence_attr]) projs.append(proj) disj = DisjunctionRule(projs, alpha, 0.5) if numrules > 1 else DisjunctionRule( projs, alpha, 0.0) #train(disj, labels_df_train, 'Label', 1e-1, 64, 5, True) #1e-2, meta = disj df = labels_df_train label = 'Label' step = 1e-3 batch = 64 epochs = 50 use_balanced = True y = align_labels(meta, df, label)