Example #1
0
    def create_initial_la(self):
        if self.induction_settings.feature_la:
            print("building initial LA from features", file=self.logger)
            nonterminal_splits, rootWeights, ruleWeights, split_id \
                = build_nont_splits_dict(self.base_grammar,
                                         self.feature_log,
                                         self.organizer.nonterminal_map,
                                         feat_function=self.induction_settings.feat_function)
            print("number of nonterminals:", len(nonterminal_splits), file=self.logger)
            print("total splits", sum(nonterminal_splits), file=self.logger)
            max_splits = max(nonterminal_splits)
            max_splits_index = nonterminal_splits.index(max_splits)
            max_splits_nont = self.organizer.nonterminal_map.index_object(max_splits_index)
            print("max. nonterminal splits", max_splits, "at index ", max_splits_index,
                  "i.e.,", max_splits_nont, file=self.logger)
            for key in split_id[max_splits_nont]:
                print(key, file=self.logger)
            print("splits for NE/1", file=self.logger)
            for key in split_id["NE/1"]:
                print(key, file=self.logger)
            for rule in self.base_grammar.lhs_nont_to_rules("NE/1"):
                print(rule, ruleWeights[rule.get_idx()], file=self.logger)
            print("number of rules", len(ruleWeights), file=self.logger)
            print("total split rules", sum(map(len, ruleWeights)), file=self.logger)
            print("number of split rules with 0 prob.",
                  sum(map(sum, map(lambda xs: map(lambda x: 1 if x == 0.0 else 0, xs), ruleWeights))),
                  file=self.logger)

            la = build_PyLatentAnnotation(nonterminal_splits, rootWeights, ruleWeights, self.organizer.grammarInfo,
                                          self.organizer.storageManager)
            la.add_random_noise(seed=self.organizer.seed)
            self.split_id = split_id
            return la
        else:
            return super(ConstituentSMExperiment, self).create_initial_la()
    def test_induction_2(self):
        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))

        features = defaultdict(lambda: 0)
        grammar = fringe_extract_lcfrs(self.tree3,
                                       rec_part(self.tree3),
                                       naming="child",
                                       feature_logging=features,
                                       isolate_pos=True)
        grammar.make_proper()

        if False:
            for idx in range(0, len(grammar.rules())):
                print(grammar.rule_index(idx))
                for key in features:
                    if key[0] == idx:
                        print(key, features[key])
                print()
            for key in features:
                if type(key[0]) == int:
                    continue
                print(key, features[key])

        nont_splits, root_weights, rule_weights, _ = build_nont_splits_dict(
            grammar,
            features,
            nonterminals=Enumerator(),
            feat_function=pos_cat_feats,
            debug=True)
        print(nont_splits)
        print(root_weights)
        print(rule_weights)
    def test_induction(self):
        naming = 'child'

        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))
            # return fanout_k_left_to_right(tree, 1)

        tree = self.tree
        tree.add_to_root("VP1")

        feature_log1 = defaultdict(lambda: 0)

        grammar = fringe_extract_lcfrs(tree,
                                       rec_part(tree),
                                       feature_logging=feature_log1,
                                       naming=naming)

        for key in feature_log1:
            print(key, feature_log1[key])

        print(grammar)

        feats = defaultdict(lambda: 0)
        grammar_ = fringe_extract_lcfrs(tree,
                                        rec_part(tree),
                                        isolate_pos=True,
                                        feature_logging=feats,
                                        naming=naming)

        print(grammar_)

        for key in feats:
            print(key, feats[key])

        print("Adding 2nd grammar to first")

        grammar.add_gram(grammar_, feature_logging=(feature_log1, feats))
        for idx in range(0, len(grammar.rules())):
            print(idx, grammar.rule_index(idx))

        print("Adding 3rd grammar to first")
        feats3 = defaultdict(lambda: 0)
        grammar3 = fringe_extract_lcfrs(self.tree2,
                                        rec_part(self.tree2),
                                        isolate_pos=True,
                                        feature_logging=feats3,
                                        naming=naming)
        grammar.add_gram(grammar3, feature_logging=(feature_log1, feats3))

        print()
        for idx in range(0, len(grammar.rules())):
            print(idx, grammar.rule_index(idx))
        print()
        print("New feature log")
        print()
        for key in feature_log1:
            print(key, feature_log1[key])
        grammar.make_proper()

        build_nont_splits_dict(grammar,
                               feature_log1,
                               nonterminals=Enumerator())

        print(grammar.rule_index(0))
        print(grammar.rule_index(2))