Beispiel #1
0
  def buildHypothesisesForArea(self, sense, hypStacks, area, areaTags, phraseBorders, phraseDerivations,
                               intrinsicCoverageMap, phraseClosedTokens, intrinsicCoverageSourceMap):
    """
    Build Hypothesis stacks in given area.

    @param hypStacks: hypothesis stack
    @param area: (low token id, high token id)
    @param phraseBorders: map node -> (left border, right border)
    @param phraseDerivations: list (child node, yielded node)
    @param phraseCoverages: map node -> (left coverage end, right coverage end)
    @param intrinsicRuleCoverages: map source string -> coverage
    @return: None
    """
    basePhrases = self.enumerateBasePhrasesForArea(area, phraseBorders, phraseDerivations)
    # Assume all base phrases fullfill the area
    # print [phraseBorders[p] for p in basePhrases]

    # Decode in phrase CYK
    for phraseCount in range(1, len(basePhrases) + 1):
      for beginPosition in range(0, len(basePhrases) - phraseCount + 1):
        phraseGroup = basePhrases[beginPosition : beginPosition + phraseCount]
        area = (phraseBorders[phraseGroup[0]][0], phraseBorders[phraseGroup[-1]][1])
        generatedSources = self.generateSources(phraseGroup, phraseClosedTokens, phraseBorders)
        finalHyps = []
        intrinsicSource = None
        intrinsicSourceString = None
        triedIntrinsicSource = False
        if area in intrinsicCoverageSourceMap:
          intrinsicSource = intrinsicCoverageSourceMap[area]
          intrinsicSourceString = self.buildSourceString(sense, areaTags, intrinsicSource)
          generatedSources.append(intrinsicSource)
        # Fetch rules and decode
        # For all sources try exactly matching
        for source in generatedSources:
          dependentAreas = [p for p in source if isinstance(p, tuple)]
          # Check dependent hypothesises
          missingSupport = False
          for dependentArea in dependentAreas:
            if dependentArea not in hypStacks:
              missingSupport = True
              break
          if missingSupport:
            continue
          # Fetch rule
          sourceString = self.buildSourceString(sense, areaTags, source)
          if not sourceString:
            continue
          if sourceString == intrinsicSourceString:
            if triedIntrinsicSource:
              continue
            else:
              triedIntrinsicSource = True
          # Fetch exactly matched rule
          exactlyMatchedRules = self.rulefetcher.findRulesBySourceString(sourceString, dependentAreas)
          hyps = None
          # If this source is an intrinsic source
          # then try to reconstruct or build depraved rules
          if not exactlyMatchedRules and  sourceString != intrinsicSourceString:
            continue
          subTreeDistance = self.getSubTreeDistance(intrinsicCoverageMap, area, dependentAreas)
          if not exactlyMatchedRules:

            # In this case, here we got a intrinsic rule covers same area in the parse tree.
            # We should not allow this rule to be kicked off, so use reconstruction or depraved glue rule.
            depravedReconstruction = len(source) > 12
            # Need reconstruction
            reconstructor = Reconstructor(self.ruletable, self.model, sense, area, sourceString, subTreeDistance,
                                          hypStacks, source, areaTags, dependentAreas, depravedReconstruction)
            hyps = reconstructor.parse()
          else:
            # Got some rules, then using normal cube pruning to get hypothesis
            pruner = CubePruner(self.model, area, sourceString, subTreeDistance, exactlyMatchedRules,
                                dependentAreas, hypStacks)
            hyps = pruner.prune()
          finalHyps.extend(hyps)
        if not finalHyps and area in intrinsicCoverageSourceMap:
          import pdb; pdb.set_trace()
        if finalHyps:
          hypStacks[area] = finalHyps[:setting.size_beam]
          areaTags[area] = self.taggingFunction(sense, phraseGroup)
Beispiel #2
0
  def translateNBestOLD(self,data_tree,data_dep):
    """
    Translate and return a N-best list
    @type data_tag: string
    @type data_dep: string
    @rtype: list of GentileHypothesis
    """
    # first, we need get the tree of input
    self.model.cacheMode = False
    setting.load(["nbest", "head_phrases_limit"])
    tree = SenseTree(data_tree,data_dep)
    tree.rebuildTopNode()
    tree.appendXToTree()
    tree.upMergeAllConjNodes()
    tree.rebuildCommaNodes()
    tree.convertTags()
    tree.separateContiniousNonTerminals()
    # tree.mergeContinuousNTs()
    fetcher = self.prepareRulesForTranslation(tree)
    # build lexical hypothesis stack
    # { id->[lexical hyp,] }
    # stack_lex = self.buildLexicalStack(fetcher)
    # { id->[lexical hyp,] }
    hypStacks = {}
    # for each fragment ( head node is not leaf ) at bottom-up style
    # use corresponding rules and basic hypothesis(lex or normal) to build normal hyp for this fragment
    tree.buildLevelMap()
    cur_level = tree.getMaxLevel()
    # A dirty trick: save current sense tree to cross-module global variable.
    __builtin__.currentSenseTree = tree
    # start pruning
    self.model.cacheMode = True
    while cur_level > 0:
      # [head id,]
      nodes_cur_level = tree.getNodesByLevel(cur_level)
      if cur_level == 1:
        self.model.smode = True
      else:
        self.model.smode = False
      for node in nodes_cur_level:
        if node not in fetcher.joints:
          # only prune for joint nodes
          continue
        # get rules
        rules, sitesInvolved = fetcher.mapJointRules[node]
        # okay available could in random order
        # we dont need sort it
        if not rules:
          # No rules found, force to use CYK.
          rc = Reconstructor(self.ruletable, self.model,
                             tree, hypStacks, node)
          hyps = rc.parse()
        else:
          # Rules found then cube prunning.
          # sort rules
          rules = self.model.sortRules(rules)
          # now run the cube pruning and get normal hypothesises for current node
          hyps = separately_prune(self.model, node, rules, sitesInvolved, hypStacks)
        hypStacks[node] = hyps
        self.model.clearCache()
      # end of current node
      cur_level -= 1

    rootNode = tree.getRootNode()
    if rootNode not in hypStacks or len(hypStacks[rootNode])==0:
      # failed
      print "[GentileDecoder]","Translation Failed!!!"
      return []

    # end building normal hypothesis stack
    # hypStacks[rootNode][0].trace()

    return hypStacks[rootNode][:setting.nbest]
Beispiel #3
0
  def buildHypothesisesForArea(self, sense, hypStacks, area, areaTags, phraseBorders, phraseDerivations, phraseCoverages, phraseClosedTokens, intrinsicRuleCoverages):
    """
    Build Hypothesis stacks in given area.

    @param hypStacks: hypothesis stack
    @param area: (low token id, high token id)
    @param phraseBorders: map node -> (left border, right border)
    @param phraseDerivations: list (child node, yielded node)
    @param phraseCoverages: map node -> (left coverage end, right coverage end)
    @param intrinsicRuleCoverages: map source string -> coverage
    @return: None
    """
    basePhrases = self.enumerateBasePhrasesForArea(area, phraseBorders, phraseDerivations)
    # Assume all base phrases fullfill the area
    # print [phraseBorders[p] for p in basePhrases]

    # Decode in phrase CYK
    for phraseCount in range(1, len(basePhrases) + 1):
      for beginPosition in range(0, len(basePhrases) - phraseCount + 1):
        phraseGroup = basePhrases[beginPosition : beginPosition + phraseCount]
        area = (phraseBorders[phraseGroup[0]][0], phraseBorders[phraseGroup[-1]][1])
        sourcesWithPhrase = self.generateSourcesWithPhrase(phraseGroup, phraseClosedTokens, phraseBorders)
        # Fetch rules and decode
        # For all sources try exactly matching
        for source, phrases in sourcesWithPhrase:
          dependentAreas = [p for p in source if isinstance(p, tuple)]
          # Check dependent hypothesises
          missingSupport = False
          for dependentArea in dependentAreas:
            if dependentArea not in hypStacks:
              missingSupport = True
              break
          if missingSupport:
            continue
          # Fetch rule
          sourceString = self.buildSourceString(sense, areaTags, source)
          if not sourceString:
            continue
          # Fetch exactly matched rule
          exactlyMatchedRules = self.rulefetcher.findRulesBySourceString(sourceString, dependentAreas)
          hyps = None
          if not exactlyMatchedRules:
            # If this source is an intrinsic source
            # then try to reconstruct or build depraved rules
            if sourceString not in intrinsicRuleCoverages or intrinsicRuleCoverages[sourceString] != area:
              continue
            # In this case, here we got a intrinsic rule covers same area in the parse tree.
            # We should not allow this rule to be kicked off, so use reconstruction or depraved glue rule.
            if len(source) > 12:
              exactlyMatchedRules = self.rulefetcher.buildDepravedMatchingRules(sense, source)
          if not exactlyMatchedRules:
            # Need reconstruction
            reconstructor = Reconstructor(self.ruletable, self.model, sense, hypStacks,
                                          source, areaTags, dependentAreas)
            hyps = reconstructor.parse()
          else:
            # Got some rules, then using normal cube pruning to get hypothesis
            hyps = separately_prune(self.model, exactlyMatchedRules, hypStacks)
          if hyps:
            hypStacks[area] = hyps
            areaTags[area] = None