Beispiel #1
0
 def get_entity_alignments(self, x, y):
     # Cache lexicon
     if not self.lex:
         self.lex = atislexicon.get_lexicon()
     alignments = []
     x_toks = x.split(' ')
     running_lens = [0] * (len(x_toks) + 1
                           )  # index to use if start at token i
     for i in range(1, len(x_toks) + 1):
         running_lens[i] = running_lens[i - 1] + 1 + len(x_toks[i - 1])
     y_toks = y.split(' ')
     lex_items = self.lex.map_over_sentence(x_toks, return_entries=True)
     lex_ents = [a[1] for a in lex_items]
     for (i, j), ent in lex_items:
         # Make sure this entity occurs exactly once in lexicon entries
         # and in the logical form
         x_span = (running_lens[i], running_lens[j] - 1)
         ent_type = ent.split(':')[1][1:]
         cat = '$' + ent_type
         if lex_ents.count(ent) != 1: continue
         if y_toks.count(ent) != 1: continue
         y_ind = y.index(ent)
         y_span = (y_ind, y_ind + len(ent))
         alignments.append((cat, x_span, y_span))
     return alignments
Beispiel #2
0
def get_lexicon():
  if OPTIONS.use_lexicon:
    if OPTIONS.domain == 'atis':
      return atislexicon.get_lexicon()
    elif OPTIONS.domain == 'geoquery':
      return geolexicon.get_lexicon()
    raise Exception('No lexicon for domain %s' % OPTIONS.domain)
  return None
Beispiel #3
0
def get_templates_and_replacements(data):
    lex = atislexicon.get_lexicon()
    templates = []
    replacements = collections.defaultdict(set)

    for x, y in data:
        x_toks = x.split(' ')
        y_toks = y.split(' ')
        lex_items = lex.map_over_sentence(x_toks, return_entries=True)
        lex_ents = [x[1] for x in lex_items]
        x_holes = []
        y_holes = []
        reptypes = []
        for (i, j), ent in lex_items:
            # Make sure this entity occurs exactly once in lexicon entries
            # and in the logical form
            if lex_ents.count(ent) != 1: continue
            if y_toks.count(ent) != 1: continue

            # Add the replacement rule
            x_span = ' '.join(x_toks[i:j])
            ent_type = ent.split(':')[1]
            replacements[ent_type].add((x_span, ent))

            # Update the template
            x_holes.append((i, j))
            y_holes.append(y_toks.index(ent))
            reptypes.append(ent_type)

        # Generate the template
        if len(x_holes) == 0: continue
        x_new_toks = list(x_toks)
        y_new_toks = list(y_toks)
        for count, ((i, j), y_ind) in enumerate(zip(x_holes, y_holes)):
            fmt_str = '%(w' + str(count) + ')s'
            x_new_toks[i] = fmt_str
            for k in range(i + 1, j):
                x_new_toks[k] = None
            y_new_toks[y_ind] = fmt_str
        x_t = ' '.join(t for t in x_new_toks if t is not None)
        y_t = ' '.join(y_new_toks)
        templates.append((x_t, y_t, reptypes))

    # Print results


#  for t in replacements:
#    print '%s:' % t
#    for x in replacements[t]:
#      print '  %s' % str(x)
#  for x_t, y_t, reps in templates:
#    print '%s -> %s (%s)' % (x_t, y_t, reps)

    return templates, replacements
def get_templates_and_replacements(data):
  lex = atislexicon.get_lexicon()
  templates = []
  replacements = collections.defaultdict(set)

  for x, y in data:
    x_toks = x.split(' ')
    y_toks = y.split(' ')
    lex_items = lex.map_over_sentence(x_toks, return_entries=True)
    lex_ents = [x[1] for x in lex_items]
    x_holes = []
    y_holes = []
    reptypes = []
    for (i, j), ent in lex_items:
      # Make sure this entity occurs exactly once in lexicon entries
      # and in the logical form
      if lex_ents.count(ent) != 1: continue
      if y_toks.count(ent) != 1: continue

      # Add the replacement rule
      x_span = ' '.join(x_toks[i:j])
      ent_type = ent.split(':')[1]
      replacements[ent_type].add((x_span, ent))

      # Update the template
      x_holes.append((i, j))
      y_holes.append(y_toks.index(ent))
      reptypes.append(ent_type)

    # Generate the template
    if len(x_holes) == 0: continue
    x_new_toks = list(x_toks)
    y_new_toks = list(y_toks)
    for count, ((i, j), y_ind) in enumerate(zip(x_holes, y_holes)):
      fmt_str = '%(w' + str(count) + ')s'
      x_new_toks[i] = fmt_str
      for k in range(i+1, j):
        x_new_toks[k] = None
      y_new_toks[y_ind] = fmt_str
    x_t = ' '.join(t for t in x_new_toks if t is not None)
    y_t = ' '.join(y_new_toks)
    templates.append((x_t, y_t, reptypes))

  # Print results
#  for t in replacements:
#    print '%s:' % t
#    for x in replacements[t]:
#      print '  %s' % str(x)
#  for x_t, y_t, reps in templates:
#    print '%s -> %s (%s)' % (x_t, y_t, reps)

  return templates, replacements
 def get_lexicon(self):
   return atislexicon.get_lexicon()
Beispiel #6
0
def get_lexicon():
  if OPTIONS.lexicon:
    if OPTIONS.domain == 'atis':
      return atislexicon.get_lexicon()
    raise Exception('No lexicon for domain %s' % OPTIONS.domain)
  return None
Beispiel #7
0
 def get_lexicon(self):
     return atislexicon.get_lexicon()
Beispiel #8
0
def get_lexicon():
    if OPTIONS.lexicon:
        if OPTIONS.domain == 'atis':
            return atislexicon.get_lexicon()
        raise Exception('No lexicon for domain %s' % OPTIONS.domain)
    return None