def to_example(self, row): assert len(row) == len(self.headers), "could not convert row to example %s\n%s" % (row, self.headers) d = dict(zip(self.headers, row)) ex = Example(**d) ex.relation = ex.annotated_relations.strip() for k in ['pos']: ex[k] = ex[k].replace('`', "'") if ex.relation in self.relation_map: ex.relation = self.relation_map[ex.relation] for k in ['dependency', 'dep_extra', 'dep_malt']: ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t") return self.convert_types(ex)
def to_example(self, row): assert len(row) == len( self.headers), "could not convert row to example %s\n%s" % ( row, self.headers) d = dict(zip(self.headers, row)) ex = Example(**d) ex.relation = ex.annotated_relations.strip() for k in ['pos']: ex[k] = ex[k].replace('`', "'") if ex.relation in self.relation_map: ex.relation = self.relation_map[ex.relation] for k in ['dependency', 'dep_extra', 'dep_malt']: ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t") return self.convert_types(ex)
def to_example(self, row): assert len(row) == len( self.headers), "could not convert row to example %s\n%s" % ( row, self.headers) d = dict(zip(self.headers, row)) ex = Example(**d) return self.convert_types(ex)
def convert_types(self, ex): for e in ['lemmas', 'words']: ex[e] = self.parse_array(ex[e], True) ex[e] = [w.lower() for w in ex[e]] for e in ['pos', 'ner']: ex[e] = self.parse_array(ex[e]) for e in [ 'subject_begin', 'subject_end', 'object_begin', 'object_end' ]: ex[e] = int(ex[e]) ex.subject = ' '.join(ex.words[ex.subject_begin:ex.subject_end]) ex.object = ' '.join(ex.words[ex.object_begin:ex.object_end]) ex.dependency = self.parse_dependency(ex.dependency, ex) for i in xrange(ex.subject_begin, ex.subject_end): ex.ner[i] = ex.subject_ner for i in xrange(ex.object_begin, ex.object_end): ex.ner[i] = ex.object_ner if not hasattr(ex, 'relation'): ex.relation = None return Example( **{k: v for k, v in ex.__dict__.items() if k in self.keep})
def featurize(self, ex, add=False): isbetween = lambda x, start, end: x >= start and x < end if isbetween(ex.subject_begin, ex.object_begin, ex.object_end) or isbetween(ex.object_begin, ex.subject_begin, ex.subject_end): raise NoPathException(str(ex)) first = 'subject' if ex.subject_begin < ex.object_begin else 'object' second = 'object' if ex.subject_begin < ex.object_begin else 'subject' chunk0 = ex.words[:ex[first + '_begin']] chunk1 = chunk0 + [ex[first + '_ner']] chunk2 = chunk1 + ex.words[ex[first + '_end']:ex[second + '_begin']] sequence = chunk2 + [ex[second + '_ner']] + ex.words[ex[second + '_end']:] first_pos = len(chunk0) second_pos = len(chunk2) if self.scope > 0: start = max(0, first_pos - self.scope) end = min(len(sequence), second_pos + self.scope + 1) sequence = sequence[start:end] feat = Example(**{ 'relation': self.vocab['rel'].get(ex.relation, add=add) if ex.relation else None, 'subject_ner': self.vocab['ner'].get(ex.subject_ner, add=add), 'object_ner': self.vocab['ner'].get(ex.object_ner, add=add), 'orig': ex, 'sequence': [self.vocab['word'].get(w, add=add) for w in sequence], 'subject_pos': first_pos if first == 'subject' else second_pos, 'object_pos': first_pos if first == 'object' else second_pos, }) ex.length = feat.length = len(feat.sequence) return feat
def to_example(self, row): assert len(row) == len( self.headers), "could not convert row to example %s\n%s" % ( row, self.headers) d = dict(zip(self.headers, row)) ex = Example(**d) for k in ['dependency', 'dep_extra', 'dep_malt']: ex[k] = ex[k].replace("\\n", "\n").replace("\\t", "\t") return self.convert_types(ex)
def simulate(self, max_turns=None, verbose=False): ''' Simulate a dialogue. ''' self.events = [] self.max_turns = max_turns time = 0 num_turns = 0 game_over = False self.describe_scenario() if random.random() < 0.5: first_speaker = 0 else: first_speaker = 1 while not game_over: for agent, session in enumerate(self.sessions): if num_turns == 0 and agent != first_speaker: continue event = session.send() time += 1 if not event: continue event.time = time self.event_callback(event) self.events.append(event) if verbose: print('agent=%s: session=%s, event=%s' % (agent, type(session).__name__, event.to_dict())) else: action = event.action data = event.data event_output = data if action == 'message' else "Action: {0}, Data: {1}".format( action, data) print('agent=%s, event=%s' % (agent, event_output)) num_turns += 1 if self.game_over() or (max_turns and num_turns >= max_turns): game_over = True break for partner, other_session in enumerate(self.sessions): if agent != partner: other_session.receive(event) uuid = generate_uuid('E') outcome = self.get_outcome() if verbose: print('outcome: %s' % outcome) print('----------------') # TODO: add configurable names to systems and sessions agent_names = {'0': self.session_names[0], '1': self.session_names[1]} return Example(self.scenario, uuid, self.events, outcome, uuid, agent_names)
def featurize(self, ex, add=False): if not ex.dependency: # no dependency parse raise NoPathException(str(ex)) feat = Example(**{ 'relation': self.vocab['rel'].get(ex.relation, add=add) if ex.relation else None, 'subject_ner': self.vocab['ner'].get(ex.subject_ner, add=add), 'object_ner': self.vocab['ner'].get(ex.object_ner, add=add), 'dependency': DependencyParse(ex.dependency, enhanced=True).get_path_from_parse( ex.subject_begin, ex.subject_end, ex.object_begin, ex.object_end), 'orig': ex, }) if not feat.dependency: # no shortest path between entities raise NoPathException(str(ex)) return feat
def corrupt(self, feat, add=False): corrupted = Example(**deepcopy(feat.__dict__)) corrupted.corrupt = True # randomly drop a node drop = np.random.randint(0, len(corrupted.sequence)) sequence = corrupted.sequence[:drop] if drop < len(corrupted.sequence) - 1: sequence += corrupted.sequence[drop+1:] corrupted.sequence = sequence corrupted.relation = self.vocab['rel'].add('no_relation') corrupted.length = len(corrupted.sequence) return corrupted if corrupted.length else None
def simulate(self, max_turns=100): '''Simulate the dialogue.''' self.events = [] time = 0 self.selections = [None, None] self.reward = 0 num_turns = 0 timeup = False while True: for agent, session in enumerate(self.sessions): event = session.send() time += 1 if not event: continue event.time = time self.events.append(event) if event.action == 'select': self.selections[agent] = event.data print 'agent=%s: session=%s, event=%s' % ( agent, type(session).__name__, event.to_dict()) num_turns += 1 if num_turns >= max_turns: timeup = True for partner, other_session in enumerate(self.sessions): if agent != partner: other_session.receive(event) # Game is over when the two selections are the same if self.game_over(): self.reward = 1 break if self.game_over() or timeup: break uuid = generate_uuid('E') outcome = {'reward': self.reward} print 'outcome: %s' % outcome return Example(self.scenario, uuid, self.events, outcome, uuid, None)
def get_decode_vocab(self, example: Example) -> BaseVocabEntry: return example.get_both_ext_vocab()