Esempio n. 1
0
def parallelInversion(transducersAndOutputs, alphabet=None):
    try:
        a = [
            sandwich.compose(y, sandwich.invert(t)).project(True)
            for y, t in transducersAndOutputs
        ]
        a = reduce(sandwich.intersect, a)
        if alphabet != None:
            lm = sandwich.union(*alphabet).closure()
            a = a * lm
        a.topsort()
        for s in a.states():
            iterator = a.mutable_arcs(s)
            while not iterator.done():
                value = iterator.value()
                #print value.olabel,value.ilabel,value.weight
                assert value.olabel == value.ilabel
                if value.olabel != 0:
                    value.weight = 1
                    iterator.set_value(value)
                iterator.next()
        return sandwich.shortestpath(a).stringify()
    except:
        # print "Got an exception in parallel inversion..."
        # for y,t in transducersAndOutputs:
        #     print "inverting:"
        #     t = invert(t)
        #     print t
        #     print "composing:"
        #     t = compose(y,t)
        #     print t
        #     print "projecting:"
        #     t = project(True)
        #     print t
        return None
Esempio n. 2
0
 def rewrite(self, i: str) -> str:
     lattice = pynini.acceptor(i, token_type=self.token_type) @ self.rule
     if lattice.start() == pynini.NO_STATE_ID:
         logging.error("Composition failure: %s", i)
         return "<composition failure>"
     return pynini.shortestpath(lattice).stringify(
         token_type=self.token_type)
Esempio n. 3
0
    def parse(self):
        fsa = fsa_from_list_of_symbols(self.input, self.fst.mutable_input_symbols())
        intersection = fsa * self.fst
        self._best = shortestpath(intersection)

        self._best.topsort()
        self._polish_rules = retrieve_rules(self._best)
Esempio n. 4
0
def decode_lattice(lattice: pynini.Fst, lm: pynini.Fst,
                   sym: pynini.SymbolTable) -> str:
    """Decodes the lattice."""
    lattice = pynini.compose(lattice, lm)
    assert lattice.start() != pynini.NO_STATE_ID, "composition failure"
    # Pynini can join the string for us.
    return pynini.shortestpath(lattice).rmepsilon().string(sym)
Esempio n. 5
0
 def singularizeFcn(stringin):
     try:
         ans = pynini.shortestpath(
             pynini.compose(stringin.strip(), singularize)).stringify()
     except Exception as e:
         print("Error from singularizeFcn:", e)
         ans = stringin
     return ans
Esempio n. 6
0
 def rewrite(self, i: str) -> str:
     lattice = (
         pynini.acceptor(i, token_type=self.input_token_type) @ self.fst)
     if lattice.start() == pynini.NO_STATE_ID:
         logging.error("Composition failure: %s", i)
         return "<composition failure>"
     lattice.project(True).rmepsilon()
     return pynini.shortestpath(lattice).string(self.output_token_type)
 def get_best_expansion(self, expansions):
     print("combining expansions and LM ...")
     best_exp = pn.intersect(expansions, self.LM)
     print("optimizing intersection ...")
     best_exp.optimize()
     #best_exp.draw('best.dot')
     shortest_path = pn.shortestpath(best_exp, nshortest=1).optimize()
     #shortest_path.draw('shortest.dot')
     return shortest_path
Esempio n. 8
0
def select_verbalizer(lattice: pynini.FstLike) -> str:
    """
    Given verbalized lattice return shortest path

    Args:
        lattice: verbalization lattice

    Returns: shortest path
    """
    output = pynini.shortestpath(lattice, nshortest=1, unique=True).string()
    return output
Esempio n. 9
0
    def _create_classified_fst(self, text):

        compiler = FST_Compiler(self.utf8_symbols, None)
        inp_fst = compiler.fst_stringcompile(text)
        all_fst = pn.compose(inp_fst, self.thrax_grammar)
        #all_fst.draw('all_class.dot')
        shortest_path = pn.shortestpath(all_fst).optimize()
        #shortest_path.draw('shortes_class.dot')
        shortest_path.rmepsilon()

        return shortest_path
Esempio n. 10
0
def best_score(match):
    """Returns the best score of a match.

  Args:
    match: an FST
  Returns:
    float
  """
    for w in py.shortestpath(match).paths().weights():
        return float(w)
    return float("inf")
Esempio n. 11
0
    def select_tag(self, lattice: 'pynini.FstLike') -> str:
        """
        Given tagged lattice return shortest path

        Args:
            tagged_text: tagged text

        Returns: shortest path
        """
        tagged_text = pynini.shortestpath(lattice, nshortest=1, unique=True).string()
        return tagged_text
Esempio n. 12
0
    def select_verbalizer(self, lattice: 'pynini.FstLike') -> str:
        """
        Given verbalized lattice return shortest path

        Args:
            lattice: verbalization lattice

        Returns: shortest path
        """
        output = pynini.shortestpath(lattice, nshortest=1,
                                     unique=True).string()
        # lattice = output @ self.verbalizer.punct_graph
        # output = pynini.shortestpath(lattice, nshortest=1, unique=True).string()
        return output
Esempio n. 13
0
def Tester(stream, far_reader):
  """Tests rules against inputs, checking outputs.

  Args:
    stream:     input stream
    far_reader: a far reader object

  Returns: None
  """
  lineno = -1
  success = True
  for line in stream:
    lineno += 1
    fields = line.strip('\n').split('\t')
    if len(fields) != 3:
      sys.stderr.write('Skipping line %d (wrong number of fields)\n' % lineno)
      continue
    rules, input_, output = fields
    input_fst = input_
    rule_failure = False
    for rule in rules.split(','):
      try:
        rule_fst = far_reader[rule]
      except KeyError:
        sys.stderr.write('Warning: cannot find rule %s, line %d\n'
                         % (rule, lineno))
        rule_failure = True
        break
      input_fst = input_fst * rule_fst
    if rule_failure:
      success = False
      continue
    ofst = pynini.shortestpath(pynini.project(input_fst, True))
    it = pynini.StringPaths(ofst)
    pred = ''
    while not it.done():
      ## Accepts the first string
      pred = it.istring()
      break
    if pred != output:
      success = False
      sys.stderr.write('Line %d: input and output do not match for\n'
                       '   Rules:\t%s\n'
                       '   Input:\t%s\nExpected:\t%s\n  Actual:\t%s\n' %
                       (lineno, rules, input_, output, pred))
  if success:
    sys.stderr.write('All tests pass!!\n')
  else:
    sys.stderr.write('Some rewrites failed\n')
Esempio n. 14
0
def lattice_to_shortest(lattice, nshortest):
    """Returns the n-shortest unique paths.

  Given an epsilon-free lattice of output strings (such as produced by
  rewrite_lattice), extracts the n-shortest unique strings. This is valid only
  in a path semiring.

  Args:
  lattice: Epsilon-free finite acceptor.
  nshortest: Maximum number of shortest paths desired.

  Returns:
  A lattice of the n-shortest unique paths.
  """
    return pynini.shortestpath(lattice, nshortest=nshortest, unique=True)
Esempio n. 15
0
def lattice_to_top_string(lattice, token_type="byte"):
    """Returns the top string in the lattice.

  Given an epsilon-free lattice of output strings (such as produced by
  rewrite_lattice), extracts a single top string. This is valid only in a path
  semiring.

  Args:
  lattice: Epsilon-free finite acceptor.
  token_type: Output token type, or symbol table.

  Returns:
  The top string.
  """
    return pynini.shortestpath(lattice).stringify(token_type)
Esempio n. 16
0
    def _language_model_scoring(self, verbal_arr):

        #word_fst, self.oov_queue = self.compiler.fst_stringcompile_words(verbal_arr)
        word_fst, self.replacement_dict = self.compiler.fst_stringcompile_words(
            verbal_arr)
        #self.replacement_dict = self.compiler.replacement_dict
        word_fst.set_output_symbols(self.word_symbols)
        word_fst.optimize()
        word_fst.project(True)
        word_fst.arcsort()
        #word_fst.draw('word_fst.dot')
        lm_intersect = pn.intersect(word_fst, self.lm)
        lm_intersect.optimize()
        #lm_intersect.draw('lm_intersect.dot')
        shortest_path = pn.shortestpath(lm_intersect).optimize()
        return shortest_path
Esempio n. 17
0
    def closest_match(self, query: pynini.FstLike) -> str:
        """Returns the closest string to the query in the lexicon.

    This method computes, for an input string or acceptor, the closest string
    in the lexicon according to the underlying edit transducer. In the case of
    a tie (i.e., where there are multiple closest strings), only one will be
    returned; tie breaking is deterministic but difficult to reason about and
    thus should be considered unspecified.) The `closest_matches` method can be
    used to enumerate all the ties.

    Args:
      query: input string or acceptor.

    Returns:
      The closest string in the lexicon.
    """
        lattice = self._create_levenshtein_automaton_lattice(query)
        return pynini.shortestpath(lattice).string()
Esempio n. 18
0
    def singularizeFcn(string):
        #return pynini.shortestpath(
        #    pynini.compose(string.strip(), singularize)).stringify()
        print "singularize has type=", type(singularize)
        #print(singularize)
        #s1 = string.strip(string)
        print "string=", string
        s1 = string.strip()
        print "s1=", s1
        s2 = pynini.union(s1)
        print "s2=", s2
        a = pynini.compose(s2, singularize)
        print "a has type=", type(a), "a="
        print(a)

        b = pynini.shortestpath(a)
        print "b="
        print b
        ans = b.stringify()
        return ans
Esempio n. 19
0
  def closest_match(self, query):
    """Returns the closest string to the query in the lexicon.

    This method computes, for an input string or acceptor, the closest string
    in the lexicon according to the underlying edit transducer. In the case of
    a tie (i.e., where there are multiple closest strings), only one will be
    returned; tie breaking is deterministic but difficult to reason about and
    thus should be considered unspecified.) The `closest_matches` method can be
    used to enumerate all the ties.

    Args:
      query: input string or acceptor.

    Returns:
      The closest string in the lexicon.
    """
    lattice = self._create_levenshtein_automaton_lattice(query)
    # For implementation reasons, the shortest path (when k = 1) is in reverse
    # state order, so we perform a topological sort ahead of time.
    return shortestpath(lattice).topsort().stringify()
Esempio n. 20
0
    def singularizeFcn(stringin):
        #return pynini.shortestpath(
        #    pynini.compose(string.strip(), singularize)).stringify()
        #print "singularize has type=",type(singularize)
        #print(singularize)
        #s1 = string.strip(stringin)
        dbg = False
        if dbg: print "string=", stringin
        s1 = stringin.strip()
        if dbg: print "s1=", s1
        s2 = pynini.union(s1)
        if dbg: print "s2=", s2
        a = pynini.compose(s2, singularize)
        if dbg: print "a has type=", type(a), "a="
        if dbg: print(a)

        b = pynini.shortestpath(a)
        if dbg: print "b="
        if dbg: print b
        ans = b.stringify()
        return ans
Esempio n. 21
0
 def pathIterator(self, limit=None, side=None):
     if limit is None:
         try:
             stringpaths = self.fsm.paths(input_token_type='symbol',
                                          output_token_type='symbol')
         except pywrapfst.FstArgError:
             print(
                 "Can't iterate over this mapping. It is cyclic and may accept infinitely many keys."
             )
             raise
     else:
         stringpaths = pynini.shortestpath(self.fsm, nshortest=limit).paths(
             input_token_type='symbol', output_token_type='symbol')
     if side == "top":
         for stringpath in stringpaths:
             yield pynini_decode(stringpath[0])
     elif side == "bottom":
         for stringpath in stringpaths:
             yield pynini_decode(stringpath[1])
     else:
         for stringpath in stringpaths:
             yield (pynini_decode(stringpath[0]),
                    pynini_decode(stringpath[1]))
Esempio n. 22
0
        def ApplyOnText(self, text: str) -> str:
            """Transduce the given string using the FST.

      Args:
        text: Input string to be transduced.

      Returns:
        Transduced string output.

      Raises:
        ValueError on Pynini string compilation exceptions.

      This operation involves pre-composing the input string with the FST and
      then finding the shortest path to output a resultant string.
      """
            try:
                # Square brackets and backslash carry special meaning in Pynini.
                # So they need to be escaped for unmanaged strings.
                return pynini.shortestpath(
                    pynini.escape(text) @ self._fst).string()
            except pynini.FstOpError as error:
                raise ValueError(
                    f'{error} on the string (between quotes): `{text}`')
Esempio n. 23
0
def runForward(t, x, k=1):
    try:
        return sandwich.shortestpath(sandwich.compose(x, t),
                                     nshortest=k).stringify()
    except:
        return None
Esempio n. 24
0
def sg(x):
    return pynini.shortestpath(pynini.compose(x.strip(),
                                              singularize)).stringify()
Esempio n. 25
0
def decode(message):
    ###performs encoding on message, projects pathways to intersect with character ngram
    ###Then returns most likely path
    lattice = ((message * t9).project(True) @ lm_char) @ lm_word
    return pynini.shortestpath(lattice).stringify()
Esempio n. 26
0
 def singularize(self, string, rules):
     results = []
     for r in rules:
         results.append((r, pynini.shortestpath(pynini.compose(string.strip(), rules[r])).stringify()))
     return sorted(results)
Esempio n. 27
0
 def select_all_semiotic_tags(self, lattice: 'pynini.FstLike', n=100) -> List[str]:
     tagged_text_options = pynini.shortestpath(lattice, nshortest=n)
     tagged_text_options = [t[1] for t in tagged_text_options.paths("utf8").items()]
     return tagged_text_options
Esempio n. 28
0
def lattice_shortest_path(lattice_fst):
    '''
    Extract the shortest path (i.e. with the lowest weight) from a
    lattice of hypotheses represented as an FST.
    '''
    return pynini.shortestpath(lattice_fst).stringify()
Esempio n. 29
0
def _singularize(string):
    return pynini.shortestpath(pynini.compose(string.strip(),
                                              singularize)).stringify()
Esempio n. 30
0
 def get_all_verbalizers(self, lattice: 'pynini.FstLike', n=100) -> List[str]:
     verbalized_options = pynini.shortestpath(lattice, nshortest=n)
     verbalized_options = [t[1] for t in verbalized_options.paths("utf8").items()]
     return verbalized_options