Exemple #1
0
    def setArgs(self, art_id, sent_id, sent, idx):
        this_part = Part.getPartByRootNodeId(
            genTreeNodeID(art_id, sent_id, idx))
        this_node = this_part.getRelTreeRoot()
        node_clust = Clust.getClust(this_part.getClustIdx())
        children = sent.get_children(idx)

        if children is None:
            return None
        else:
            for dependency, child_index in children:
                child_node_id = genTreeNodeID(art_id, sent_id, child_index)
                path = Path(dependency)
                argTypeIdx = path.getArgType()
                child_part = Part.getPartByRootNodeId(child_node_id)

                if child_part.getParPart() is not None:
                    continue

                arg = Argument(this_node, path, child_part)
                argIdx = this_part.addArgument(arg)
                child_part.setParent(this_part, argIdx)
                argClustIdxs = node_clust.getArgClustIdxs(argTypeIdx)
                argClustIdx = -1

                if argClustIdxs is None:
                    argClustIdx = node_clust.createArgClust(argTypeIdx)
                else:
                    argClustIdx = next(iter(argClustIdxs))

                this_part.setArgClust(argIdx, argClustIdx, clust_only=True)

                setArgs(art_id, sent_id, sent, child_index)

        return None
Exemple #2
0
    def mergeArg(self, clust, aci1, aci2):
        ac2 = clust._argClusts[aci2]

        for node_id in ac2._partRootTreeNodeIds.copy():
            part = Part.getPartByRootNodeId(node_id)

            for arg_id, arg_clust_id in part._argIdx_argClustIdx.items():
                if arg_clust_id == aci2:
                    part.setArgClust(arg_id, aci1)

        return None
Exemple #3
0
    def addAgendaForNewClust(self, newClustIdx, verbose=False):
        part_node_ids = Part.getClustPartRootNodeIds()[newClustIdx]
        num_parts = len(part_node_ids)

        # if verbose:
        #     print("Updating agenda: {} possible operations.".format(num_parts*(num_parts-1)))

        if len(part_node_ids) > 1:
            for node_id in part_node_ids:
                part_1 = Part.getPartByRootNodeId(node_id)

                for node_id2 in part_node_ids:
                    if node_id <= node_id2:
                        break
                    part_2 = Part.getPartByRootNodeId(node_id2)

                    # self.logc.write("\tAdding parts {} and {} to agenda for cluster {}\n".format(node_id, node_id2, newClustIdx))
                    self.addAgendaAfterMergeClust(part_1, part_2)

        return None
Exemple #4
0
    def initializeSent(self, ai, sj, sent, verbose=False):
        '''
            Create TreeNode, Part, and Clust for each token in a sentence,
            also adding/assigning RelTypes.
            Increment the root count for the cluster assigned to the root
            token (tokens with a parent of ROOT). 
            Finally, run CreateArgs() to define the parent-child relation-
            ships. This call is recursive, traversing the whole dependency
            tree for each sentence.
        '''
        self.numTkns += len(sent.get_tokens()) - 1
        roots = sent.get_children(0)

        if roots is None:
            return None
        elif len(roots) == 0:
            return None

        for k in range(1, len(sent.get_tokens())):
            Parse.part_from_node(ai, sj, sent, k, sent.get_token(k))

        # if len(roots) == 1:
        for _, idx in roots:
            sub_node_id = genTreeNodeID(ai, sj, idx)
            # Is this global set really necessary? I don't think it is...
            self.rootTreeNodeIds.add(sub_node_id)
            node_part = Part.getPartByRootNodeId(sub_node_id)

            if node_part is None:
                continue

            ncl = Clust.getClust(node_part.getClustIdx())
            ncl.incRootCnt()
            self.createArgs(ai, sj, sent, idx, verbose)

        return None
Exemple #5
0
    def reparse(self, aid, si):
        a = id_article[aid]
        sent = a.sentences[si]

        roots = sent.get_children(0)

        if roots is None:
            return None
        elif len(roots) == 0:
            return None
        else:
            old_nid_part = {}

            for ni in range(len(sent.get_tokens())):
                if Parse.isIgnore(sent, ni):
                    continue
                nid = genTreeNodeID(aid, si, ni)
                np = Part.getPartByRootNodeId(nid)
                del Part.rootTreeNodeId_part[nid]
                old_nid_part[nid] = np

            nid_part = {}

            for ni in range(len(sent.get_tokens())):
                if Parse.isIgnore(sent, ni):
                    continue
                part, clustIdx = Parse.part_from_node(aid, si, sent, ni)
                nid_part[genTreeNodeID(aid, si, ni)] = part
                part.setClust(clustIdx, clust_only=True)

            if len(roots) == 1:
                _, idx = next(iter(roots))
                nid = genTreeNodeID(aid, si, idx)
                np = Part.getPartByRootNodeId(nid)

                if np is not None:
                    setArgs(aid, si, sent, idx)

                maxImp = 1

                while maxImp > 0:
                    rp, ap = None, None
                    maxImp = 0

                    for prt in nid_part.values():
                        for arg in prt.getArguments().values():
                            score = self.scorer.scoreOpComposePart(prt, arg)

                            if score > maxImp:
                                maxImp = score
                                rp, ap = prt, arg

                    if maxImp <= 0:
                        break

                    self.executor.execComposePart(rp, ap)
                    del nid_part[ap.getRelTreeRoot().getId()]

                Clust.removePartAndUpdateStat(old_nid_part)
                Clust.updatePartStat(nid_part)

        return None
Exemple #6
0
    def createArgs(self,
                   art_id,
                   sent_id,
                   sent,
                   parent_id,
                   done=set(),
                   verbose=False):
        '''
            For each token, get the TreeNode, Part, Cluster and (based on 
            sentence dependencies) the children tokens.

            For each child token, use the dependency relationship to define 
            a Path and then argument type and Argument defining the parent-
            child relationship. Then add/create an ArgClust before recursing
            on any grand-child tokens. 

            #
            ## CHECK TOKENS SO WE DON'T GET STUCK IN A RECURSIVE LOOP IF 
            ## DEPENDENCIES ARE MALFORMED
            # 
        '''
        parent_node_id = genTreeNodeID(art_id, sent_id, parent_id)
        parent = TreeNode.getTreeNode(parent_node_id)
        parent_part = Part.getPartByRootNodeId(parent_node_id)
        parent_clust = Clust.getClust(parent_part.getClustIdx())
        children = sent.get_children(parent_id)

        if children is not None:
            for relation, child_id in children:
                child_node_id = genTreeNodeID(art_id, sent_id, child_id)
                path = Path(relation)
                arg_type_id = path.getArgType()

                # if child_node_id in done:
                #     continue

                child_part = Part.getPartByRootNodeId(child_node_id)

                if child_part is None:
                    if verbose:
                        print("Child node id {} has no part".format(
                            child_node_id))

                if child_part.getParPart() is not None:
                    if verbose:
                        print("Child node id {} already has "
                              "parent {}".format(
                                  child_node_id,
                                  child_part.getParPart().getRelTreeRoot().
                                  getId()))
                    continue

                arg = Argument(parent, path, child_part)
                arg_id = parent_part.addArgument(arg)
                child_part.setParent(parent_part, arg_id)

                arg_clust_ids = parent_clust.getArgClustIdxs(arg_type_id)

                if arg_clust_ids is None:
                    arg_clust_id = parent_clust.createArgClust(arg_type_id)
                else:
                    arg_clust_id = next(iter(arg_clust_ids))

                parent_part.setArgClust(arg_id, arg_clust_id)

                #done.add(child_node_id)
                self.createArgs(art_id, sent_id, sent, child_id)

        #done.add(parent_node_id)

        return None
Exemple #7
0
    def updateAgendaAfterExecMC(self, op, newClustIdx, verbose=False):
        assert op._op == SearchOp.OP_MERGE_CLUST

        oldClustIdx = op._clustIdx2

        if oldClustIdx == newClustIdx:
            oldClustIdx = op._clustIdx1

        while len(self._clustIdx_agenda[oldClustIdx]) > 0:
            oop = next(iter(self._clustIdx_agenda[oldClustIdx]))
            self.removeAgenda(oop)

            if oop._op == SearchOp.OP_MERGE_CLUST:
                ci1 = oop._clustIdx1
                ci2 = oop._clustIdx2

                if ci1 == oldClustIdx:
                    ci1 = newClustIdx

                if ci2 == oldClustIdx:
                    ci2 = newClustIdx

                if ci1 != ci2:
                    nop = oop
                    nop._clustIdx1 = min((ci1, ci2))
                    nop._clustIdx2 = max((ci1, ci2))
                    nop.genString()
                    self.addAgendaToScore(nop)
            elif oop._op == SearchOp.OP_COMPOSE:
                ci1 = oop._parClustIdx
                ci2 = oop._chdClustIdx

                if ci1 == oldClustIdx:
                    ci1 = newClustIdx

                if ci2 == oldClustIdx:
                    ci2 = newClustIdx

                nop = oop
                nop._parClustIdx = ci1
                nop._chdClustIdx = ci2
                nop.genString()
                self.addAgendaToScore(nop)

        del self._clustIdx_agenda[oldClustIdx]

        num_parts_old = len(Part.getClustPartRootNodeIds()[oldClustIdx])
        num_parts_new = len(Part.getClustPartRootNodeIds()[newClustIdx])

        if verbose:
            print("Updating agenda: {} possible operations.".format(
                num_parts_new * (num_parts_old)))

        for prnid in Part.getClustPartRootNodeIds()[newClustIdx]:
            p = Part.getPartByRootNodeId(prnid)

            for prnid2 in Part.getClustPartRootNodeIds()[oldClustIdx]:
                p2 = Part.getPartByRootNodeId(prnid2)
                self.addAgendaAfterMergeClust(p, p2)

        return None
Exemple #8
0
    def scoreMergeArgs(self, clust, arg1, arg2):
        # log = open("/Users/ben_ryan/Documents/DARPA ASKE/usp-code/genia_full/score.log", "a+")
        # log.write("Scoring merge for args {} and {} for cluster {}\n".format(arg1, arg2, clust))
        score = 0
        score -= ParseParams.priorMerge
        # log.write("Score = {}\n".format(score))

        total_part_cnt = clust._ttlCnt

        arg_clust1 = clust._argClusts[arg1]
        arg_clust2 = clust._argClusts[arg2]

        part_ids1 = arg_clust1._partRootTreeNodeIds
        part_ids2 = arg_clust2._partRootTreeNodeIds

        total_part_count1 = len(part_ids1)
        total_part_count2 = len(part_ids2)

        total_arg_count1 = arg_clust1._ttlArgCnt
        total_arg_count2 = arg_clust2._ttlArgCnt

        score -= (xlogx(total_part_cnt - total_part_count1) \
                + xlogx(total_part_cnt - total_part_count2))
        # log.write("score -= (xlogx(total_part_cnt - total_part_count1) + xlogx(total_part_cnt - total_part_count2)) = {}\n".format(score))
        score += xlogx(total_part_cnt)
        # log.write("score += xlogx(total_part_cnt) = {}\n".format(score))
        score -= (2 * Scorer.updateScore(total_arg_count1, total_arg_count2))
        # log.write("score -= (2 * Scorer.updateScore(total_arg_count1, total_arg_count2)) = {}\n".format(score))

        argNum_newCnt = dict()

        for dic in (arg_clust1._argNum_cnt, arg_clust2._argNum_cnt):
            for arg_num, count in dic.items():
                if count == 0:
                    print("Zero arguments of type {}".format(arg_num))
                    raise Exception
                else:
                    score -= xlogx(count)
                    # log.write("score -= xlogx({} argnum {}) = {}\n".format(arg_num, count, score))

                argNum_newCnt = inc_key(argNum_newCnt, arg_num, inc=count)

        comb_part_cnt = total_part_count1 + total_part_count2
        part_iter1 = iter(part_ids1)
        part_iter2 = iter(part_ids2)
        pid1 = next(part_iter1)
        pid2 = next(part_iter2)

        while True:
            # log.write("pid1 = {}, pid2 = {}\n".format(pid1, pid2))
            if pid1 == pid2:
                cnt1 = len(
                    Part.getPartByRootNodeId(pid1)._argClustIdx_argIdxs[arg1])
                cnt2 = len(
                    Part.getPartByRootNodeId(pid2)._argClustIdx_argIdxs[arg2])
                comb_cnts = cnt1 + cnt2
                comb_part_cnt -= 1

                argNum_newCnt = inc_key(argNum_newCnt, comb_cnts)
                argNum_newCnt = dec_key(argNum_newCnt, cnt1, remove=True)
                argNum_newCnt = dec_key(argNum_newCnt, cnt2, remove=True)

                try:
                    pid1 = next(part_iter1)
                    pid2 = next(part_iter2)
                except StopIteration:
                    break
            elif pid1 < pid2:
                while True:
                    try:
                        pid1 = next(part_iter1)
                    except StopIteration:
                        break

                    if pid1 >= pid2:
                        break

                if pid1 < pid2:
                    break
            else:
                while True:
                    try:
                        pid2 = next(part_iter2)
                    except StopIteration:
                        break

                    if pid1 <= pid2:
                        break

                if pid1 > pid2:
                    break

        score += xlogx(total_part_cnt - comb_part_cnt)
        # log.write("score += xlogx(total_part_cnt - comb_part_cnt) = {}\n".format(score))

        for count in argNum_newCnt.values():
            score += xlogx(count)
            # log.write("score += xlogx(argNum_newCnt ({})) = {}\n".format(count, score))

        score += ((len(arg_clust1._argNum_cnt) \
                 + len(arg_clust2._argNum_cnt) \
                 - len(argNum_newCnt)) \
                 * ParseParams.priorNumParam)
        # log.write("score += ((len(arg_clust1._argNum_cnt) + len(arg_clust2._argNum_cnt) - len(argNum_newCnt)) * ParseParams.priorNumParam) = {}\n".format(score))

        argtype_count1 = arg_clust1._argTypeIdx_cnt
        argtype_count2 = arg_clust2._argTypeIdx_cnt

        score = Scorer.update_score_from_ds(score, argtype_count1,
                                            argtype_count2)
        # log.write("score after counting ArgTypes = {}\n".format(score))

        child_clust_count1 = arg_clust1._chdClustIdx_cnt
        child_clust_count2 = arg_clust2._chdClustIdx_cnt

        score = Scorer.update_score_from_ds(score, child_clust_count1,
                                            child_clust_count2)
        # log.write("score after counting child clusters = {}\n\n".format(score))
        # log.close()

        return score
Exemple #9
0
    def scoreOpCompose(self, rcidx, acidx):
        def update_score_from_dict(scr, d, orig_d):
            for key, cnt in d.items():
                origcnt = orig_d[key]
                # assert origcnt >= cnt
                scr -= xlogx(origcnt)

                if cnt > 0:
                    scr += xlogx(cnt)
                else:
                    scr += ParseParams.priorNumParam

            return scr

        # get parent and child root-node id numbers
        parChdNids = Part.getPairPartRootNodeIds(rcidx, acidx)

        if parChdNids is None:
            return -10000

        score = 0
        rcl = Clust.getClust(rcidx)
        acl = Clust.getClust(acidx)

        # Parent count, child count, and count of times they occur
        # together.
        rtc_new = rcl._ttlCnt
        atc_new = acl._ttlCnt
        ratc_new = 0
        raRootCnt = 0

        parArg_cnt = dict()

        rRelTypeIdx_newcnt = dict()
        aRelTypeIdx_newcnt = dict()
        raRelTypeIdx_newcnt = dict()

        rArgClustIdx_argNum_cnt = dict()
        aArgClustIdx_argNum_cnt = dict()
        rNewArgClustIdx_argNum_cnt = dict()
        aNewArgClustIdx_argNum_cnt = dict()

        rArgClustIdx_argTypeIdx_cnt = dict()
        aArgClustIdx_argTypeIdx_cnt = dict()
        rNewArgClustIdx_argTypeIdx_cnt = dict()
        aNewArgClustIdx_argTypeIdx_cnt = dict()

        rArgClustIdx_chdClustIdx_cnt = dict()
        aArgClustIdx_chdClustIdx_cnt = dict()
        rNewArgClustIdx_chdClustIdx_cnt = dict()
        aNewArgClustIdx_chdClustIdx_cnt = dict()

        rArgClustIdx_partCnt = dict()
        aArgClustIdx_partCnt = dict()
        rNewArgClustIdx_partCnt = dict()
        aNewArgClustIdx_partCnt = dict()

        rArgClustIdx_argCnt = dict()
        aArgClustIdx_argCnt = dict()
        rNewArgClustIdx_argCnt = dict()
        aNewArgClustIdx_argCnt = dict()

        # For each parent-child pair:
        for pcnid in parChdNids:
            pp, cp = Part.getPartByRootNodeId(
                pcnid[0]), Part.getPartByRootNodeId(pcnid[1])

            rtc_new -= 1
            atc_new -= 1
            ratc_new += 1

            rrt = pp.getRelTypeIdx()
            art = cp.getRelTypeIdx()
            raArgClustidx = pp.getArgClust(cp._parArgIdx)

            # Decrement individual relType counts and increment the combined
            # relType count for this pair
            rRelTypeIdx_newcnt = dec_key(rRelTypeIdx_newcnt,
                                         rrt,
                                         base=rcl._relTypeIdx_cnt[rrt])

            aRelTypeIdx_newcnt = dec_key(aRelTypeIdx_newcnt,
                                         art,
                                         base=acl._relTypeIdx_cnt[art])

            raRelTypeIdx_newcnt = inc_key(raRelTypeIdx_newcnt, (rrt, art))

            pp_par = pp.getParPart()

            # If the parent has a parent, increment the parArg count, otherwise
            # increment the root count.
            if pp_par is not None:
                ai = pp.getParArgIdx()
                ppi = pp_par.getClustIdx()
                aci = pp_par.getArgClust(ai)

                parArg_cnt = inc_key(parArg_cnt, (ppi, aci))
            else:
                raRootCnt += 1

            # For each argClust on the parent part, decrement the old parent
            # part count and argClust count, and increment the new ones. The
            # trick is don't copy/increment the counts for argument shared by
            # this pair.
            for arg_ci in pp._argClustIdx_argIdxs:
                an = len(pp._argClustIdx_argIdxs[arg_ci])
                ac = rcl._argClusts[arg_ci]

                rArgClustIdx_partCnt = dec_key(rArgClustIdx_partCnt,
                                               arg_ci,
                                               base=len(
                                                   ac._partRootTreeNodeIds))

                if arg_ci not in rArgClustIdx_argNum_cnt:
                    rArgClustIdx_argNum_cnt[arg_ci] = {}

                rArgClustIdx_argNum_cnt[arg_ci] = \
                    dec_key(rArgClustIdx_argNum_cnt[arg_ci],
                            an,
                            base=ac._argNum_cnt[an])

                newArgNum = an

                if arg_ci == raArgClustidx:
                    newArgNum -= 1

                if newArgNum == 0:
                    continue

                if arg_ci not in rNewArgClustIdx_argNum_cnt:
                    rNewArgClustIdx_argNum_cnt[arg_ci] = {}

                rNewArgClustIdx_argNum_cnt[arg_ci] = \
                    inc_key(rNewArgClustIdx_argNum_cnt[arg_ci], newArgNum)

                rNewArgClustIdx_partCnt = inc_key(rNewArgClustIdx_partCnt,
                                                  arg_ci)

            # Same as above, but for child part, and we don't skip anything.
            for arg_ci in cp._argClustIdx_argIdxs:
                an = len(cp._argClustIdx_argIdxs[arg_ci])
                ac = acl._argClusts[arg_ci]

                aArgClustIdx_partCnt = dec_key(aArgClustIdx_partCnt,
                                               arg_ci,
                                               base=len(
                                                   ac._partRootTreeNodeIds))

                if arg_ci not in aArgClustIdx_argNum_cnt:
                    aArgClustIdx_argNum_cnt[arg_ci] = {}

                aArgClustIdx_argNum_cnt[arg_ci] = \
                    dec_key(aArgClustIdx_argNum_cnt[arg_ci],
                            an,
                            base=ac._argNum_cnt[an])

                if arg_ci not in aNewArgClustIdx_argNum_cnt:
                    aNewArgClustIdx_argNum_cnt[arg_ci] = {}

                aNewArgClustIdx_argNum_cnt[arg_ci] = \
                    inc_key(aNewArgClustIdx_argNum_cnt[arg_ci], an)

                aNewArgClustIdx_partCnt = inc_key(aNewArgClustIdx_partCnt,
                                                  arg_ci)

            args = pp.getArguments()

            # For all the parent's arguments
            for ai, arg in args.items():
                arg_part = arg._argPart
                child_clust_id = arg_part._clustIdx
                aci = pp.getArgClust(ai)
                ac = rcl._argClusts[aci]
                ati = arg._path.getArgType()

                # Drop the old arguments

                rArgClustIdx_argCnt = dec_key(rArgClustIdx_argCnt,
                                              aci,
                                              base=ac._ttlArgCnt)

                if aci not in rArgClustIdx_argTypeIdx_cnt:
                    rArgClustIdx_argTypeIdx_cnt[aci] = {}

                rArgClustIdx_argTypeIdx_cnt[aci] = \
                    dec_key(rArgClustIdx_argTypeIdx_cnt[aci],
                            ati,
                            base=ac._argTypeIdx_cnt[ati])

                if aci not in rArgClustIdx_chdClustIdx_cnt:
                    rArgClustIdx_chdClustIdx_cnt[aci] = {}

                rArgClustIdx_chdClustIdx_cnt[aci] = \
                    dec_key(rArgClustIdx_chdClustIdx_cnt[aci],
                            child_clust_id,
                            base=ac._chdClustIdx_cnt[child_clust_id])

                # Add the new arguments, except for the child part we're possibly
                # absorbing

                if arg_part.getRelTreeRoot().getId() != cp.getRelTreeRoot(
                ).getId():
                    rNewArgClustIdx_argCnt = inc_key(rNewArgClustIdx_argCnt,
                                                     aci)

                    if aci not in rNewArgClustIdx_argTypeIdx_cnt:
                        rNewArgClustIdx_argTypeIdx_cnt[aci] = {}

                    rNewArgClustIdx_argTypeIdx_cnt[aci] = \
                        inc_key(rNewArgClustIdx_argTypeIdx_cnt[aci], ati)

                    if aci not in rNewArgClustIdx_chdClustIdx_cnt:
                        rNewArgClustIdx_chdClustIdx_cnt[aci] = {}

                    rNewArgClustIdx_chdClustIdx_cnt[aci] = \
                        inc_key(rNewArgClustIdx_chdClustIdx_cnt[aci], child_clust_id)

            args = cp.getArguments()

            for ai, arg in args.items():
                ap = arg._argPart
                cci = ap._clustIdx
                aci = cp.getArgClust(ai)
                ac = acl._argClusts[aci]
                ati = arg._path.getArgType()

                # Drop the old arguments

                aArgClustIdx_argCnt = dec_key(aArgClustIdx_argCnt,
                                              aci,
                                              base=ac._ttlArgCnt)

                if aci not in aArgClustIdx_argTypeIdx_cnt:
                    aArgClustIdx_argTypeIdx_cnt[aci] = {}

                aArgClustIdx_argTypeIdx_cnt[aci] = \
                    dec_key(aArgClustIdx_argTypeIdx_cnt[aci],
                            ati,
                            base=ac._argTypeIdx_cnt[ati])

                if aci not in aArgClustIdx_chdClustIdx_cnt:
                    aArgClustIdx_chdClustIdx_cnt[aci] = dict()

                aArgClustIdx_chdClustIdx_cnt[aci] = \
                    dec_key(aArgClustIdx_chdClustIdx_cnt[aci],
                            cci,
                            base=ac._chdClustIdx_cnt[cci])

                # Add the new arguments

                aNewArgClustIdx_argCnt = inc_key(aNewArgClustIdx_argCnt, aci)

                if aci not in aNewArgClustIdx_argTypeIdx_cnt:
                    aNewArgClustIdx_argTypeIdx_cnt[aci] = {}

                aNewArgClustIdx_argTypeIdx_cnt[aci] = \
                    inc_key(aNewArgClustIdx_argTypeIdx_cnt[aci], ati)

                if aci not in aNewArgClustIdx_chdClustIdx_cnt:
                    aNewArgClustIdx_chdClustIdx_cnt[aci] = {}

                aNewArgClustIdx_chdClustIdx_cnt[aci] = \
                    inc_key(aNewArgClustIdx_chdClustIdx_cnt[aci], cci)

        if raRootCnt > 0:
            origRootCnt = Clust.clustIdx_rootCnt[rcidx]

            if origRootCnt > raRootCnt:
                score +=  xlogx(raRootCnt) \
                        + xlogx(origRootCnt - raRootCnt) \
                        - xlogx(origRootCnt)
                score -= ParseParams.priorNumParam

        denomor = xlogx(rcl._ttlCnt)
        denomnr = xlogx(rtc_new)

        score = update_score_from_dict(score, rRelTypeIdx_newcnt,
                                       rcl._relTypeIdx_cnt)

        score += denomor
        score -= denomnr

        denomoa = xlogx(acl._ttlCnt)
        denomna = xlogx(atc_new)

        score = update_score_from_dict(score, aRelTypeIdx_newcnt,
                                       acl._relTypeIdx_cnt)

        score += denomoa
        score -= denomna

        for cnt in raRelTypeIdx_newcnt.values():
            score -= ParseParams.priorNumParam
            score += xlogx(cnt)

        denomra = xlogx(ratc_new)
        score -= denomra

        for pi, cnt in parArg_cnt.items():
            pc = Clust.getClust(pi[0])
            ac = pc._argClusts[pi[1]]
            origcnt = ac._chdClustIdx_cnt[rcidx]

            if cnt == origcnt:
                continue

            score -= ParseParams.priorNumParam
            score += xlogx(cnt) + xlogx(origcnt - cnt) - xlogx(origcnt)

        for aci, ac in rcl._argClusts.items():
            origPartCnt = len(ac._partRootTreeNodeIds)
            score -= (xlogx(rcl._ttlCnt - origPartCnt) - denomor)

            if aci not in rArgClustIdx_partCnt:
                score += (xlogx(rtc_new - origPartCnt) - denomnr)
                continue

            if rArgClustIdx_partCnt[aci] > 0:
                score += (xlogx(rtc_new - rArgClustIdx_partCnt[aci]) - denomnr)

            score = update_score_from_dict(score, rArgClustIdx_argNum_cnt[aci],
                                           ac._argNum_cnt)

            score -= 2 * (xlogx(rArgClustIdx_argCnt[aci]) -
                          xlogx(ac._ttlArgCnt))

            score = update_score_from_dict(score,
                                           rArgClustIdx_argTypeIdx_cnt[aci],
                                           ac._argTypeIdx_cnt)

            score = update_score_from_dict(score,
                                           rArgClustIdx_chdClustIdx_cnt[aci],
                                           ac._chdClustIdx_cnt)

        # line 570 in Scorer.java

        for aci, ac in acl._argClusts.items():
            origPartCnt = len(ac._partRootTreeNodeIds)
            score -= (xlogx(acl._ttlCnt - origPartCnt) - denomoa)

            if aci not in aArgClustIdx_partCnt:
                score += (xlogx(atc_new - origPartCnt) - denomna)
                continue

            if aArgClustIdx_partCnt[aci] > 0:
                score += (xlogx(atc_new - aArgClustIdx_partCnt[aci]) - denomna)

            score = update_score_from_dict(score, aArgClustIdx_argNum_cnt[aci],
                                           ac._argNum_cnt)

            score -= 2 * (xlogx(aArgClustIdx_argCnt[aci]) -
                          xlogx(ac._ttlArgCnt))

            score = update_score_from_dict(score,
                                           aArgClustIdx_argTypeIdx_cnt[aci],
                                           ac._argTypeIdx_cnt)
            score = update_score_from_dict(score,
                                           aArgClustIdx_chdClustIdx_cnt[aci],
                                           ac._chdClustIdx_cnt)

        for ds in [(rNewArgClustIdx_partCnt, rNewArgClustIdx_argNum_cnt),
                   (aNewArgClustIdx_partCnt, aNewArgClustIdx_argNum_cnt)]:
            for aci, partCnt in ds[0].items():
                score += xlogx(ratc_new - partCnt) - denomra

                for idx, cnt in ds[1][aci].items():
                    score += xlogx(cnt)
                    score -= ParseParams.priorNumParam

        for ds in [(rNewArgClustIdx_argCnt, rNewArgClustIdx_argTypeIdx_cnt,
                    rNewArgClustIdx_chdClustIdx_cnt),
                   (aNewArgClustIdx_argCnt, aNewArgClustIdx_argTypeIdx_cnt,
                    aNewArgClustIdx_chdClustIdx_cnt)]:
            for aci, argCnt in ds[0].items():
                score -= 2 * xlogx(argCnt)

                for idx, cnt in ds[1][aci].items():
                    score += xlogx(cnt)
                    score -= ParseParams.priorNumParam

                for idx, cnt in ds[2][aci].items():
                    score += xlogx(cnt)
                    score -= ParseParams.priorNumParam

        return score
Exemple #10
0
    def execCompose(self, op):
        parClustIdx = op._parClustIdx
        chdClustIdx = op._chdClustIdx
        new_clust_id = -1

        #
        # If either cluster are None, return -1
        #
        if Clust.getClust(parClustIdx) is None or Clust.getClust(
                chdClustIdx) is None:
            return -1

        new_clust = None
        parent_child_pair = (parClustIdx, chdClustIdx)
        part_ids = set()
        part_ids.update(
            Part.pairClustIdxs_pairPartRootNodeIds[parent_child_pair])

        deleted_parts = []

        for parent_id, child_id in part_ids:
            if parent_id in deleted_parts or child_id in deleted_parts:
                continue

            parent_part = Part.getPartByRootNodeId(parent_id)
            child_part = Part.getPartByRootNodeId(child_id)
            dep = parent_part.getArguments()[
                child_part._parArgIdx]._path.getDep()
            parent_part._relTreeRoot.addChild(dep, child_part._relTreeRoot)
            nrti = RelType.getRelType(parent_part._relTreeRoot)

            if new_clust is None:
                # on first loop
                rel_clusts = Clust.getClustsWithRelType(nrti)
                if rel_clusts is None:
                    new_clust = Clust.getClust(Clust.createClust(nrti))
                elif len(rel_clusts) > 1:
                    raise Exception
                else:
                    new_clust = Clust.getClust(next(iter(rel_clusts)))

                new_clust_id = new_clust.getId()

            parent_part.removeArgument(child_part._parArgIdx)

            if parent_part.getClustIdx() != new_clust_id:
                for argIdx in parent_part.getArguments():
                    parent_part.unsetArgClust(argIdx)
                    arg = parent_part.getArgument(argIdx)
                    arg._argPart.unsetParent()

                parent_part.changeClust(new_clust_id, nrti)

                for argIdx, arg in parent_part.getArguments().items():
                    arg_type = arg._path.getArgType()
                    arg_clust_id = -1

                    if arg_type not in new_clust._argTypeIdx_argClustIdxs:
                        arg_clust_id = new_clust.createArgClust(arg_type)
                    elif len(
                            new_clust._argTypeIdx_argClustIdxs[arg_type]) == 0:
                        arg_clust_id = new_clust.createArgClust(arg_type)
                    else:
                        arg_clust_id = next(
                            iter(new_clust._argTypeIdx_argClustIdxs[arg_type]))

                    arg._argPart.setParent(parent_part, argIdx)
                    parent_part.setArgClust(argIdx, arg_clust_id)

                parent_part.setRelTypeIdx(nrti)
            else:
                parent_part.unsetRelTypeIdx()
                parent_part.setRelTypeIdx(nrti)

            #
            # Connect the child part's arguments directly to the parent part now
            #

            for argIdx, arg in child_part.getArguments().items():
                child_part.unsetArgClust(argIdx)
                arg_type = arg._path.getArgType()
                arg_clust_id = -1

                if arg_type not in new_clust._argTypeIdx_argClustIdxs:
                    arg_clust_id = new_clust.createArgClust(arg_type)
                elif len(new_clust._argTypeIdx_argClustIdxs[arg_type]) == 0:
                    arg_clust_id = new_clust.createArgClust(arg_type)
                else:
                    arg_clust_id = next(
                        iter(new_clust._argTypeIdx_argClustIdxs[arg_type]))

                newArgIdx = parent_part.addArgument(arg)
                arg._argPart.setParent(parent_part, newArgIdx)
                parent_part.setArgClust(newArgIdx, arg_clust_id)

            #
            # Remove the old child part
            #

            deleted_parts.append(child_part.getRelTreeRoot().getId())
            child_part.destroy()

        # Part.clustIdx_pairClustIdxs[parClustIdx].remove(pci)
        # Part.clustIdx_pairClustIdxs[chdClustIdx].remove(pci)
        del Part.pairClustIdxs_pairPartRootNodeIds[parent_child_pair]

        return new_clust_id
Exemple #11
0
    def execMC(self, op):
        #
        # Get clusters associated with our op
        #

        cluster1 = Clust.getClust(op._clustIdx1)
        cluster2 = Clust.getClust(op._clustIdx2)

        if cluster1 is None or cluster2 is None:
            return -1

        #
        # If cluster 1 has fewer argument clusters than cluster 2, swap them.
        # We merge the "smaller" cluster into the larger one.
        #

        if len(cluster1._argClusts) < len(cluster2._argClusts):
            clust_swap = cluster2
            cluster2 = cluster1
            cluster1 = clust_swap

        #
        # Align the argument clusters based on scores, and then map over
        # any remaining argument clusters from cluster 2 to cluster 1.
        #

        aci2_aci1 = dict()
        scorer = self._parse.scorer
        _, aci2_aci1 = scorer.scoreMCForAlign(cluster1, cluster2, aci2_aci1)

        for arg_clust_id2 in cluster2._argClusts:
            if arg_clust_id2 not in aci2_aci1:
                arg_clust = cluster2._argClusts[arg_clust_id2]

                for arg_type in arg_clust._argTypeIdx_cnt:
                    arg_clust_ids = cluster1.getArgClustIdxs(arg_type)

                    if arg_clust_ids is None:
                        arg_clust_id1 = cluster1.createArgClust(arg_type)
                    else:
                        arg_clust_id1 = next(iter(arg_clust_ids))

                    aci2_aci1[arg_clust_id2] = arg_clust_id1
                    break

        #
        # Finally, remap the Parts in cluster 2 to cluster 1 as well.
        #

        part_ids = set()
        part_ids.update(Part.getPartRootNodeIds(cluster2.getId()))

        for part_id in part_ids:
            pt = Part.getPartByRootNodeId(part_id)

            for arg in pt.getArguments().values():
                arg._argPart.unsetParent()

            pt.changeClustRemap(cluster1.getId(), aci2_aci1)

            for argIdx, arg in pt.getArguments().items():
                arg._argPart.setParent(pt, argIdx)

        Clust.removeClust(cluster2)

        return cluster1.getId()