Esempio n. 1
0
 def test(self):
     entities = [
         CanonicalEntity("amanda", "name"),
         CanonicalEntity("rowan eastern college of the arts", "school"),
         CanonicalEntity("bible studies", "major")
     ]
     realized = self.realize_entity(entities)
     print realized
Esempio n. 2
0
 def from_csv(cls, path, threshold):
     entities = []
     with open(path, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=',')
         for i, row in enumerate(reader):
             entities.append(CanonicalEntity(value=row['title'], type='title'))
     return cls(entities=entities, threshold=threshold)
Esempio n. 3
0
    def detect_slots(self, tokens, kb, context=None, join=False, stem=False):
        '''
        join: join consecutive slot words to one entity
        '''
        if not context:
            context = self.get_context_tokens((kb, ), stem=stem)
        #print 'context tokens:', context

        role = kb.facts['personal']['Role']
        category = kb.facts['item']['Category']
        labels = self._get_slot_labels(role,
                                       category,
                                       context,
                                       tokens,
                                       stem=stem)

        slot_entity = lambda s: Entity(
            surface=s, canonical=CanonicalEntity(value='', type='slot'))
        if not join:
            slots = [
                slot_entity(x) if labels[i] == 1 else x
                for i, x in enumerate(tokens)
            ]
        else:
            spans = self.label_to_span(labels)
            slots = [
                tokens[x] if not isinstance(x, tuple) else slot_entity(
                    ' '.join(tokens[x[0]:x[1]])) for x in spans
            ]
        return slots
Esempio n. 4
0
    def _tokens_to_event(self, tokens, output_data, semi_event=False):
        # if self.agent == 0 :
        #     try:
        #         tokens = [0, 0]
        #         tokens[0] = markers.OFFER
        #         tokens[1] = '$60'
        #     except ValueError:
        #         #return None
        #         pass

        if isinstance(tokens, tuple):
            tokens = list(tokens)

        if isinstance(tokens[1], float):
            tokens[1] = CanonicalEntity(type='price', value=tokens[1])

        if semi_event:
            # From scale to real price
            # print('semi_event: {}->'.format(tokens[1]),end='')
            if tokens[1] is not None:
                tokens[1] = self.builder.get_price_number(tokens[1], self.kb)
            # print('{}.'.format(tokens[1]))
            return tokens

        if isinstance(tokens[0], int):
            tokens[0] = self.env.vocab.to_word(tokens[0])


        if len(tokens) > 1 and tokens[0] == markers.OFFER and is_entity(tokens[1]):
            try:
                price = self.builder.get_price_number(tokens[1], self.kb)
                return self.offer({'price': price}, metadata={"output_data": output_data})
            except ValueError:
                # return None
                pass
        elif tokens[0] == markers.OFFER:
            assert False

        tokens = self.builder.entity_to_str(tokens, self.kb)

        if len(tokens) > 0:
            if tokens[0] == markers.ACCEPT:
                return self.accept(metadata={"output_data": output_data})
            elif tokens[0] == markers.REJECT:
                return self.reject(metadata={"output_data": output_data})
            elif tokens[0] == markers.QUIT:
                return self.quit(metadata={"output_data": output_data})

        while len(tokens) > 0 and tokens[-1] == None: tokens = tokens[:-1]
        s = self.attach_punct(' '.join(tokens))
        # print 'send:', s
        
        # print(">>> sender's intent: ", tokens)
        role = self.kb.facts['personal']['Role']
        category = self.kb.facts['item']['Category']
        real_uttr = self.uttr_gen(tokens, role, category)
        # print(">>> sender's uttr: ", real_uttr)

        return self.message(s, metadata={"output_data": output_data, "real_uttr": real_uttr})
Esempio n. 5
0
 def _init_prices(self):
     from core.price_tracker import PriceList
     self.p_list = PriceList.getPriceList().p_list
     from cocoa.core.entity import Entity, CanonicalEntity
     self.add_words([
         Entity(surface='',
                canonical=CanonicalEntity(value=p, type='price'))
         for p in self.p_list
     ])
Esempio n. 6
0
 def int_to_text(self, inds, stage=None, prices=None):
     '''
     Inverse of text_to_int.
     '''
     toks = [self.vocab.to_word(ind) for ind in inds]
     if prices is not None:
         assert len(inds) == len(prices)
         toks = [CanonicalEntity(value=p, type='price') if price_filler(x) else x for x, p in izip(toks, prices)]
     return toks
    def link_entity(self, raw_tokens, kb=None, scale=True, price_clip=None):
        """
        Takes the numbers in the tokenized sentence and converts them into Entity Numbers

        Args:
            raw_tokens: List of dialogue tokens
            kb: agent knowledge base
            scale(bool): If True, will scale the prices between 0 and 1
            price_clip(int, optional): If specified, will ignore all numbers greater then the price_clip

        Returns:
            A new list of tokens, with the number tokens replaced with entities

        """
        tokens = ['<s>'] + raw_tokens + ['</s>']
        entity_tokens = []
        if kb:
            kb_numbers = self.get_kb_numbers(kb)
            list_price = kb.facts['item']['Price']
        for i in xrange(1, len(tokens)-1):  # Ignore the start and end tokens that were just added
            token = tokens[i]
            try:
                number = float(self.process_string(token))
                has_dollar = lambda token: token[0] == '$' or token[-1] == '$'
                # Check context
                if not has_dollar(token) and \
                        not self.is_price(tokens[i-1], tokens[i+1]):
                    number = None
                # Avoid 'infinity' being recognized as a number
                elif number == float('inf') or number == float('-inf'):
                    number = None
                # Check if the price is reasonable
                elif kb:
                    if not has_dollar(token):
                        if number > 1.5 * list_price:
                            number = None
                        # Probably a spec number
                        if number != list_price and number in kb_numbers:
                            number = None
                    if number is not None and price_clip is not None:
                        scaled_price = PriceScaler._scale_price(kb, number)
                        if abs(scaled_price) > price_clip:
                            number = None
            except ValueError:
                number = None
            if number is None:
                new_token = token
            else:
                assert not math.isnan(number)
                if scale:
                    scaled_price = PriceScaler._scale_price(kb, number)
                else:
                    scaled_price = number
                new_token = Entity(surface=token, canonical=CanonicalEntity(value=scaled_price, type='price'))
            entity_tokens.append(new_token)
        return entity_tokens
Esempio n. 8
0
 def get_entity_coords(self):
     """Return a dict of {entity: [row]}
     """
     entity_coords = defaultdict(list)
     for row, item in enumerate(self.kb.items):
         for col, attr in enumerate(self.kb.attributes):
             entity = CanonicalEntity(value=item[attr.name].lower(),
                                      type=attr.value_type)
             entity_coords[entity].append(row)
     return entity_coords
Esempio n. 9
0
 def count_entity(self):
     '''
     Return a dict of {entity: count}.
     '''
     entity_counts = defaultdict(int)
     for item in self.kb.items:
         for attr_name, entity_value in item.iteritems():
             entity = CanonicalEntity(entity_value.lower(),
                                      self.attr_type[attr_name])
             entity_counts[entity] += 1
     return entity_counts
Esempio n. 10
0
    def from_file(cls, inverse_lexicon_path):
        """Read linked entities from file.

        Process inverse lexicon data
            <entity> \t <span> \t <type>
        and generate variant frequency count

        """
        inverse_lexicon = defaultdict(Counter)
        with open(inverse_lexicon_path, "r") as f:
            for line in f:
                value, surface, type_ = line.strip().split("\t")
                entity = CanonicalEntity(value, type_)
                inverse_lexicon[entity][surface] += 1
        return cls(inverse_lexicon)
Esempio n. 11
0
    def __init__(self, raw_text, tokens, action='message'):
        self.text = raw_text
        self.tokens = tokens
        self.action = action
        self.prices = []
        self.keywords = []
        self.speech_acts = []
        self.stage = -1
        self.categories = defaultdict(lambda: defaultdict(int))

        if self.action == 'message':
            self.prices = [token for token in self.tokens if is_entity(token)]
        elif self.action == 'offer':
            price = self.text
            self.prices.append(
                Entity(price, CanonicalEntity(float(price), 'price')))
Esempio n. 12
0
 def rewrite_candidate(self, fillers, candidate):
     rewritten = []
     tokens = candidate
     if not tokens:
         return rewritten
     for i, tok in enumerate(tokens):
         if is_entity(tok) and tok.canonical.type == 'slot':
             for filler in fillers:
                 ss = filler.split()
                 new_tokens = list(tokens)
                 del new_tokens[i]
                 for j, s in enumerate(ss):
                     new_tokens.insert(
                         i + j, Entity(s, CanonicalEntity('', 'slot')))
                 new_cand = new_tokens
                 rewritten.append(new_cand)
     return rewritten
Esempio n. 13
0
 def link_entity(self, raw_tokens, kb=None, scale=True, price_clip=None):
     tokens = ['<s>'] + raw_tokens + ['</s>']
     entity_tokens = []
     if kb:
         kb_numbers = self.get_kb_numbers(kb)
         list_price = kb.facts['item']['Price']
     for i in xrange(1, len(tokens) - 1):
         token = tokens[i]
         try:
             number = float(self.process_string(token))
             has_dollar = lambda token: token[0] == '$' or token[-1] == '$'
             # Check context
             if not has_dollar(token) and \
                     not self.is_price(tokens[i-1], tokens[i+1]):
                 number = None
             # Avoid 'infinity' being recognized as a number
             elif number == float('inf') or number == float('-inf'):
                 number = None
             # Check if the price is reasonable
             elif kb:
                 if not has_dollar(token):
                     if number > 1.5 * list_price:
                         number = None
                     # Probably a spec number
                     if number != list_price and number in kb_numbers:
                         number = None
                 if number is not None and price_clip is not None:
                     scaled_price = PriceScaler._scale_price(kb, number)
                     if abs(scaled_price) > price_clip:
                         number = None
         except ValueError:
             number = None
         if number is None:
             new_token = token
         else:
             assert not math.isnan(number)
             if scale:
                 scaled_price = PriceScaler._scale_price(kb, number)
             else:
                 scaled_price = number
             new_token = Entity(surface=token,
                                canonical=CanonicalEntity(
                                    value=scaled_price, type='price'))
         entity_tokens.append(new_token)
     return entity_tokens
Esempio n. 14
0
    def _tokens_to_event(self, tokens, output_data):
        # if self.agent == 0 :
        #     try:
        #         tokens = [0, 0]
        #         tokens[0] = markers.OFFER
        #         tokens[1] = '$60'
        #     except ValueError:
        #         #return None
        #         pass

        if isinstance(tokens, tuple):
            tokens = list(tokens)
        if isinstance(tokens[0], int):
            tokens[0] = self.env.vocab.to_word(tokens[0])

        if isinstance(tokens[1], float):
            tokens[1] = CanonicalEntity(type='price', value=tokens[1])

        if len(tokens) > 1 and tokens[0] == markers.OFFER and is_entity(
                tokens[1]):
            try:
                price = self.builder.get_price_number(tokens[1], self.kb)
                return self.offer({'price': price}, metadata=output_data)
            except ValueError:
                # return None
                pass
        elif tokens[0] == markers.OFFER:
            assert False

        tokens = self.builder.entity_to_str(tokens, self.kb)

        if len(tokens) > 0:
            if tokens[0] == markers.ACCEPT:
                return self.accept(metadata=output_data)
            elif tokens[0] == markers.REJECT:
                return self.reject(metadata=output_data)
            elif tokens[0] == markers.QUIT:
                return self.quit(metadata=output_data)

        while len(tokens) > 0 and tokens[-1] == None:
            tokens = tokens[:-1]
        s = self.attach_punct(' '.join(tokens))
        # print 'send:', s
        return self.message(s, metadata=output_data)
Esempio n. 15
0
 def get_same_row_entities(self, entities):
     """Return entities in the same row as `entities`.
     """
     print 'get same row:', entities
     rows = set(range(len(self.kb.items)))
     for entity in entities:
         rows = rows.intersection(set(self.entity_coords[entity]))
     row_entities = []
     for row in rows:
         ents = []
         if self.item_weights[row] < 0:
             continue
         item = self.kb.items[row]
         for col, attr in enumerate(self.kb.attributes):
             entity = CanonicalEntity(value=item[attr.name].lower(),
                                      type=attr.value_type)
             if not entity in entities:
                 ents.append(entity)
         if ents:
             row_entities.append(ents)
     return row_entities
Esempio n. 16
0
 def load_candidates(self, paths):
     candidates = defaultdict(list)
     # When dumped to json, NamedTuple becomes list. Now convert it back.
     is_str = lambda x: isinstance(x, basestring)
     # x[0] (surface of entity): note that for prices from the offer action,
     # surface is float instead of string
     to_ent = lambda x: x.encode('utf-8') if is_str(x) else \
         Entity(x[0].encode('utf-8') if is_str(x[0]) else x[0], CanonicalEntity(*x[1]))
     for path in paths:
         print 'Load candidates from', path
         results = read_json(path)
         for r in results:
             # None for encoding turns
             if r['candidates'] is None:
                 candidates[(r['uuid'], r['role'])].append(None)
             else:
                 # Only take the response (list of tokens)
                 candidates_ = [[to_ent(x) for x in c['response']]
                                for c in ifilter(lambda x: 'response' in x,
                                                 r['candidates'])]
                 candidates[(r['uuid'], r['role'])].append(candidates_)
     return candidates
Esempio n. 17
0
 def from_json(cls, path, threshold):
     entities = []
     reader = json.load( open(path, 'r') )
     for i, row in enumerate(reader):
         entities.append(CanonicalEntity(value=row['title'], type='title'))
     return cls(entities=entities, threshold=threshold)
Esempio n. 18
0
 def price_to_entity(cls, price):
     return Entity(price, CanonicalEntity(price, 'price'))
Esempio n. 19
0
 def tuple_to_entity(cls, token):
     if isinstance(token, list):
         return Entity(token[0], CanonicalEntity(*token[1]))
     else:
         return token
Esempio n. 20
0
 def to_word(self, ind):
     if isinstance(ind, int):
         return self.ind_to_word[ind]
     else:
         from cocoa.core.entity import CanonicalEntity
         return CanonicalEntity(value=ind, type='price')