def validate_entity_mention_token_ref_sequences(comm): valid = True for entityMentionSet in lun(comm.entityMentionSetList): for entityMention in lun(entityMentionSet.mentionList): valid &= validate_token_ref_sequence( comm, entityMention.tokens) return valid
def validate_situations(comm): valid = True entity_uuidString_set = _get_entity_uuidString_set(comm) situation_mention_uuidString_set = _get_situation_mention_uuidString_set( comm) situation_uuidString_set = _get_situation_uuidString_set(comm) for situationSet in lun(comm.situationSetList): for situation in lun(situationSet.situationList): for argument in lun(situation.argumentList): if (argument.situationId and argument.situationId.uuidString not in situation_uuidString_set): valid = False logging.error(_ilm( 2, ("Argument for Situation '%s' has an invalid" " situationId (%s). Tool='%s'") % (situation.uuid, argument.situationId, situationSet.metadata.tool))) if (argument.entityId and argument.entityId.uuidString not in entity_uuidString_set): valid = False logging.error(_ilm( 2, ("Argument for Situation '%s' has an invalid entityId" " (%s). Tool='%s'") % (situation.uuid, argument.entityId, situationSet.metadata.tool))) for justification in lun(situation.justificationList): if (justification.mentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error(_ilm( 2, ("Justification for Situation '%s' has an invalid" " [situation] mentionId (%s). Tool='%s'") % (situation.uuid, justification.mentionId, situationSet.metadata.tool))) if justification.tokenRefSeqList: for tokenRefSeq in justification.tokenRefSeqList: valid &= validate_token_ref_sequence( comm, tokenRefSeq) for mentionId in lun(situation.mentionIdList): if (mentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error(_ilm( 2, ("Situation '%s' has an invalid [situation] mentionId" " (%s). Tool='%s'") % (situation.uuid, mentionId, situationSet.metadata.tool))) return valid
def validate_situations(comm): valid = True entity_uuidString_set = _get_entity_uuidString_set(comm) situation_mention_uuidString_set = _get_situation_mention_uuidString_set( comm) situation_uuidString_set = _get_situation_uuidString_set(comm) for situationSet in lun(comm.situationSetList): for situation in lun(situationSet.situationList): for argument in lun(situation.argumentList): if (argument.situationId and argument.situationId.uuidString not in situation_uuidString_set): valid = False logging.error( _ilm(2, ("Argument for Situation '%s' has an invalid" " situationId (%s). Tool='%s'") % (situation.uuid, argument.situationId, situationSet.metadata.tool))) if (argument.entityId and argument.entityId.uuidString not in entity_uuidString_set): valid = False logging.error( _ilm(2, ( "Argument for Situation '%s' has an invalid entityId" " (%s). Tool='%s'") % (situation.uuid, argument.entityId, situationSet.metadata.tool))) for justification in lun(situation.justificationList): if (justification.mentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error( _ilm(2, ("Justification for Situation '%s' has an invalid" " [situation] mentionId (%s). Tool='%s'") % (situation.uuid, justification.mentionId, situationSet.metadata.tool))) if justification.tokenRefSeqList: for tokenRefSeq in justification.tokenRefSeqList: valid &= validate_token_ref_sequence(comm, tokenRefSeq) for mentionId in lun(situation.mentionIdList): if (mentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error( _ilm(2, ( "Situation '%s' has an invalid [situation] mentionId" " (%s). Tool='%s'") % (situation.uuid, mentionId, situationSet.metadata.tool))) return valid
def validate_entity_mention_tokenization_ids(comm): valid = True tokenization_uuidString_set = _get_tokenization_uuidString_set(comm) for entityMentionSet in lun(comm.entityMentionSetList): for entityMention in lun(entityMentionSet.mentionList): if (entityMention.tokens.tokenizationId.uuidString not in tokenization_uuidString_set): valid = False logging.error(_ilm( 2, "Mention '%s' has an invalid tokenizationId (%s)" % (entityMention.uuid, entityMention.tokens.tokenizationId))) return valid
def validate_situation_mentions(comm): valid = True entity_mention_uuidString_set = _get_entity_mention_uuidString_set(comm) situation_mention_uuidString_set = _get_situation_mention_uuidString_set( comm) for situationMentionSet in lun(comm.situationMentionSetList): for situationMention in lun(situationMentionSet.mentionList): if situationMention.tokens: valid &= validate_token_ref_sequence( comm, situationMention.tokens) for (m_idx, m_arg) in enumerate(situationMention.argumentList): if (m_arg.entityMentionId and m_arg.entityMentionId.uuidString not in entity_mention_uuidString_set): valid = False logging.error(_ilm( 2, ("MentionArgument for SituationMention '%s' has an" " invalid entityMentionId (%s). Tool='%s'") % (situationMention.uuid.uuidString, m_arg.entityMentionId, situationMentionSet.metadata.tool))) if (m_arg.situationMentionId and m_arg.situationMentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error(_ilm( 2, ("MentionArgument for SituationMention '%s' has an" " invalid situationMentionId (%s). Tool='%s'") % (situationMention.uuid, m_arg.situationMentionId, situationMentionSet.metadata.tool))) total_args = ( bool(m_arg.tokens) + bool(m_arg.entityMentionId) + bool(m_arg.situationMentionId) ) if total_args != 1: valid = False logging.error(_ilm( 2, ("MentionArgument #%d for SituationMention '%s'" " should have exactly one EntityMention|" "SituationMention|TokenRefSequence, but found %d") % (m_idx, situationMention.uuid.uuidString, total_args))) return valid
def _get_situation_uuidString_set(comm): """ Args: - `comm` (`Communication`) Returns: - set of strings: uuidStrings for all Situations in the Communication """ situation_uuidString_set = set() for situationSet in lun(comm.situationSetList): for situation in lun(situationSet.situationList): situation_uuidString_set.add(situation.uuid.uuidString) return situation_uuidString_set
def _get_entity_uuidString_set(comm): """ Args: - `comm` (`Communication`) Returns: - set of strings: uuidStrings for all Entities in the Communication """ entity_uuidString_set = set() for entitySet in lun(comm.entitySetList): for entity in lun(entitySet.entityList): entity_uuidString_set.add(entity.uuid.uuidString) return entity_uuidString_set
def validate_entity_mention_tokenization_ids(comm): valid = True tokenization_uuidString_set = _get_tokenization_uuidString_set(comm) for entityMentionSet in lun(comm.entityMentionSetList): for entityMention in lun(entityMentionSet.mentionList): if (entityMention.tokens.tokenizationId.uuidString not in tokenization_uuidString_set): valid = False logging.error( _ilm( 2, "Mention '%s' has an invalid tokenizationId (%s)" % (entityMention.uuid, entityMention.tokens.tokenizationId))) return valid
def validate_entity_mention_ids(comm): valid = True entity_mention_uuidString_set = _get_entity_mention_uuidString_set(comm) for entitySet in lun(comm.entitySetList): for entity in lun(entitySet.entityList): for entityMentionId in entity.mentionIdList: if (entityMentionId.uuidString not in entity_mention_uuidString_set): valid = False logging.error(_ilm( 2, "Entity '%s' has an invalid entityMentionId (%s)" % (entity.uuid, entityMentionId))) return valid
def validate_situation_mentions(comm): valid = True entity_mention_uuidString_set = _get_entity_mention_uuidString_set(comm) situation_mention_uuidString_set = _get_situation_mention_uuidString_set( comm) for situationMentionSet in lun(comm.situationMentionSetList): for situationMention in lun(situationMentionSet.mentionList): if situationMention.tokens: valid &= validate_token_ref_sequence(comm, situationMention.tokens) for (m_idx, m_arg) in enumerate(situationMention.argumentList): if (m_arg.entityMentionId and m_arg.entityMentionId.uuidString not in entity_mention_uuidString_set): valid = False logging.error( _ilm( 2, ("MentionArgument for SituationMention '%s' has an" " invalid entityMentionId (%s). Tool='%s'") % (situationMention.uuid.uuidString, m_arg.entityMentionId, situationMentionSet.metadata.tool))) if (m_arg.situationMentionId and m_arg.situationMentionId.uuidString not in situation_mention_uuidString_set): valid = False logging.error( _ilm( 2, ("MentionArgument for SituationMention '%s' has an" " invalid situationMentionId (%s). Tool='%s'") % (situationMention.uuid, m_arg.situationMentionId, situationMentionSet.metadata.tool))) total_args = (bool(m_arg.tokens) + bool(m_arg.entityMentionId) + bool(m_arg.situationMentionId)) if total_args != 1: valid = False logging.error( _ilm( 2, ("MentionArgument #%d for SituationMention '%s'" " should have exactly one EntityMention|" "SituationMention|TokenRefSequence, but found %d") % (m_idx, situationMention.uuid.uuidString, total_args))) return valid
def validate_entity_mention_ids(comm): valid = True entity_mention_uuidString_set = _get_entity_mention_uuidString_set(comm) for entitySet in lun(comm.entitySetList): for entity in lun(entitySet.entityList): for entityMentionId in entity.mentionIdList: if (entityMentionId.uuidString not in entity_mention_uuidString_set): valid = False logging.error( _ilm( 2, "Entity '%s' has an invalid entityMentionId (%s)" % (entity.uuid, entityMentionId))) return valid
def _get_tokenization_uuidString_set(comm): """ Args: - `comm` (`Communication`) Returns: - set of strings: uuidStrings for all Tokenizations in the Communication """ tokenization_uuidString_set = set() for section in lun(comm.sectionList): for sentence in lun(section.sentenceList): if sentence.tokenization: tokenization_uuidString_set.add( sentence.tokenization.uuid.uuidString) return tokenization_uuidString_set
def _get_sentence_for_tokenization_uuidString_dict(comm): """ Args: - `comm` (`Communication`) Returns: - dictionary mapping of Tokenization uuidStrings to Sentences """ if not hasattr(comm, 'sentence_for_tokenization_uuidString_dict'): comm.sentence_for_tokenization_uuidString_dict = {} for section in lun(comm.sectionList): for sentence in lun(section.sentenceList): if sentence.tokenization: comm.sentence_for_tokenization_uuidString_dict[ sentence.tokenization.uuid.uuidString] = sentence return comm.sentence_for_tokenization_uuidString_dict
def print_communication_taggings_for_communication(comm, tool=None): communication_taggings = _filter_by_tool( lun(comm.communicationTaggingList), tool) for tagging in communication_taggings: print '%s: %s' % ( tagging.taggingType, ' '.join('%s:%.3f' % p for p in zip(tagging.tagList, tagging.confidenceList)) )
def _get_tokenization_uuidString_dict(comm): """ Args: - `comm` (`Communication`) Returns: - dictionary mapping uuidStrings to Tokenizations """ if not hasattr(comm, '_tokenization_uuidString_dict'): comm._tokenization_uuidString_dict = {} for section in lun(comm.sectionList): for sentence in lun(section.sentenceList): tkzn = sentence.tokenization if tkzn: u = tkzn.uuid.uuidString comm._tokenization_uuidString_dict[u] = tkzn return comm._tokenization_uuidString_dict
def print_situations(comm, tool=None): """Print information for all Situations and their SituationMentions Args: - `comm`: A Concrete Communication """ for s_set_idx, s_set in enumerate(lun(comm.situationSetList)): if tool is None or s_set.metadata.tool == tool: print u"Situation Set %d (%s):" % (s_set_idx, s_set.metadata.tool) for s_idx, situation in enumerate(s_set.situationList): print u" Situation %d-%d:" % (s_set_idx, s_idx) _p(6, 18, u"situationType", situation.situationType) for sm_idx, sm in enumerate(lun(situation.mentionList)): print u" " * 6 + u"SituationMention %d-%d-%d:" % ( s_set_idx, s_idx, sm_idx) _print_situation_mention(sm) print print
def print_situations(comm): """Print information for all Situations and their SituationMentions Args: - `comm`: A Concrete Communication """ for s_set_idx, s_set in enumerate(lun(comm.situationSetList)): if s_set.metadata: print u"Situation Set %d (%s):" % (s_set_idx, s_set.metadata.tool) else: print u"Situation Set %d:" % s_set_idx for s_idx, situation in enumerate(s_set.situationList): print u" Situation %d-%d:" % (s_set_idx, s_idx) _p(6, 18, u"situationType", situation.situationType) for sm_idx, sm in enumerate(lun(situation.mentionList)): print u" " * 6 + u"SituationMention %d-%d-%d:" % ( s_set_idx, s_idx, sm_idx) _print_situation_mention(sm) print print
def print_situation_mentions(comm, tool=None): """Print information for all SituationMentions (some of which may not have Situations) Args: - `comm`: A Concrete Communication """ for sm_set_idx, sm_set in enumerate(lun(comm.situationMentionSetList)): if tool is None or sm_set.metadata.tool == tool: print u"Situation Set %d (%s):" % (sm_set_idx, sm_set.metadata.tool) for sm_idx, sm in enumerate(sm_set.mentionList): print u" SituationMention %d-%d:" % (sm_set_idx, sm_idx) _print_situation_mention(sm) print print
def validate_communication(comm): """ Args: - `comm` (`Communication`) Returns: - `True` if Communication is valid, `False` otherwise """ valid = True logging.info(_ilm(0, "Validating Communication with ID '%s'" % comm.id)) valid &= validate_thrift_deep(comm) for section in lun(comm.sectionList): valid &= validate_token_offsets_for_section(section) if section.sentenceList: logging.debug( _ilm( 4, "section '%s' has %d sentences" % (section.uuid, len(section.sentenceList)))) for sentence in section.sentenceList: valid &= validate_token_offsets_for_sentence(sentence) if sentence.tokenization: valid &= validate_constituency_parses( comm, sentence.tokenization) valid &= validate_dependency_parses(sentence.tokenization) valid &= validate_token_taggings(sentence.tokenization) valid &= validate_entity_mention_ids(comm) valid &= validate_entity_mention_tokenization_ids(comm) valid &= validate_entity_mention_token_ref_sequences(comm) valid &= validate_situations(comm) valid &= validate_situation_mentions(comm) if not valid: logging.error( _ilm(0, "The Communication with ID '%s' IS NOT valid" % comm.id)) else: logging.info( _ilm(0, "The Communication with ID '%s' is valid" % comm.id)) return valid
def validate_communication(comm): """ Args: - `comm` (`Communication`) Returns: - `True` if Communication is valid, `False` otherwise """ valid = True logging.info(_ilm(0, "Validating Communication with ID '%s'" % comm.id)) valid &= validate_thrift_deep(comm) for section in lun(comm.sectionList): valid &= validate_token_offsets_for_section(section) if section.sentenceList: logging.debug(_ilm(4, "section '%s' has %d sentences" % (section.uuid, len(section.sentenceList)))) for sentence in section.sentenceList: valid &= validate_token_offsets_for_sentence(sentence) if sentence.tokenization: valid &= validate_constituency_parses( comm, sentence.tokenization) valid &= validate_dependency_parses( sentence.tokenization) valid &= validate_token_taggings(sentence.tokenization) valid &= validate_entity_mention_ids(comm) valid &= validate_entity_mention_tokenization_ids(comm) valid &= validate_entity_mention_token_ref_sequences(comm) valid &= validate_situations(comm) valid &= validate_situation_mentions(comm) if not valid: logging.error( _ilm(0, "The Communication with ID '%s' IS NOT valid" % comm.id)) else: logging.info( _ilm(0, "The Communication with ID '%s' is valid" % comm.id)) return valid
def validate_token_offsets_for_section(section): """ Test if the TextSpan boundaries for all sentences in a section fall within the boundaries of the section's TextSpan """ valid = True if section.textSpan is None: return valid if section.textSpan.start > section.textSpan.ending: valid = False logging.error(_ilm( 2, ("Section '%s' has a TextSpan with a start offset (%d) > end" " offset (%d)") % (section.uuid, section.textSpan.start, section.textSpan.ending))) for sentence in lun(section.sentenceList): if sentence.textSpan is None: continue if sentence.textSpan.start > sentence.textSpan.ending: valid = False logging.error(_ilm( 2, ("Sentence '%s' has a TextSpan with a start offset (%d) > end" " offset (%d)") % (sentence.uuid, sentence.textSpan.start, sentence.textSpan.ending))) elif ((sentence.textSpan.start < section.textSpan.start) or (sentence.textSpan.start > section.textSpan.ending) or (sentence.textSpan.ending < section.textSpan.start) or (sentence.textSpan.ending > section.textSpan.ending)): valid = False logging.error(_ilm( 2, ("Sentence '%s' in Section '%s' has a TextSpan [%d, %d] that" " does not fit within the Section TextSpan [%d, %d]") % (sentence.uuid, section.uuid, sentence.textSpan.start, sentence.textSpan.ending, section.textSpan.start, section.textSpan.ending))) return valid
def print_sections(comm): """Print information for all Sections, according to their spans. Args: - `comm`: A Concrete Communication """ text = comm.text for sect_idx, sect in enumerate(lun(comm.sectionList)): ts = sect.textSpan if ts is None: print u"Section %s does not have a textSpan " "field set" % (sect.uuid.uuidString) continue print u"Section %d (%s), from %d to %d:" % ( sect_idx, sect.uuid.uuidString, ts.start, ts.ending) print u"%s" % (text[ts.start:ts.ending]) print print
def print_situation_mentions(comm): """Print information for all SituationMentions (some of which may not have Situations) Args: - `comm`: A Concrete Communication """ for sm_set_idx, sm_set in enumerate(lun(comm.situationMentionSetList)): if sm_set.metadata: print u"Situation Set %d (%s):" % (sm_set_idx, sm_set.metadata.tool) else: print u"Situation Set %d:" % sm_set_idx for sm_idx, sm in enumerate(sm_set.mentionList): print u" SituationMention %d-%d:" % (sm_set_idx, sm_idx) _print_situation_mention(sm) print print
def validate_token_offsets_for_section(section): """ Test if the TextSpan boundaries for all sentences in a section fall within the boundaries of the section's TextSpan """ valid = True if section.textSpan is None: return valid if section.textSpan.start > section.textSpan.ending: valid = False logging.error( _ilm(2, ("Section '%s' has a TextSpan with a start offset (%d) > end" " offset (%d)") % (section.uuid, section.textSpan.start, section.textSpan.ending))) for sentence in lun(section.sentenceList): if sentence.textSpan is None: continue if sentence.textSpan.start > sentence.textSpan.ending: valid = False logging.error( _ilm(2, ( "Sentence '%s' has a TextSpan with a start offset (%d) > end" " offset (%d)") % (sentence.uuid, sentence.textSpan.start, sentence.textSpan.ending))) elif ((sentence.textSpan.start < section.textSpan.start) or (sentence.textSpan.start > section.textSpan.ending) or (sentence.textSpan.ending < section.textSpan.start) or (sentence.textSpan.ending > section.textSpan.ending)): valid = False logging.error( _ilm(2, ( "Sentence '%s' in Section '%s' has a TextSpan [%d, %d] that" " does not fit within the Section TextSpan [%d, %d]") % (sentence.uuid, section.uuid, sentence.textSpan.start, sentence.textSpan.ending, section.textSpan.start, section.textSpan.ending))) return valid
def print_sections(comm, tool=None): """Print information for all Sections, according to their spans. Args: - `comm`: A Concrete Communication """ if tool is None or comm.metadata.tool == tool: text = comm.text for sect_idx, sect in enumerate(lun(comm.sectionList)): ts = sect.textSpan if ts is None: print u"Section %s does not have a textSpan " "field set" % (sect.uuid.uuidString) continue print u"Section %d (%s), from %d to %d:" % ( sect_idx, sect.uuid.uuidString, ts.start, ts.ending) print u"%s" % (text[ts.start:ts.ending]) print print
def get_entityMentions_by_tokenizationId(comm): """Get entity mentions for a Communication grouped by Tokenization UUID string Args: - `comm`: A Concrete Communication object Returns: - A dictionary of lists of EntityMentions, where the dictionary keys are Tokenization UUID strings. """ mentions_by_tkzn_id = defaultdict(list) for entitySet in lun(comm.entitySetList): for entity in entitySet.entityList: for entityMention in entity.mentionList: u = entityMention.tokens.tokenizationId.uuidString mentions_by_tkzn_id[u].append(entityMention) return mentions_by_tkzn_id
def _get_entityMentions_by_tokenizationId(comm, tool=None): """Get entity mentions for a Communication grouped by Tokenization UUID string Args: - `comm`: A Concrete Communication object Returns: - A dictionary of lists of EntityMentions, where the dictionary keys are Tokenization UUID strings. """ mentions_by_tkzn_id = defaultdict(list) for entitySet in lun(comm.entitySetList): for entity in entitySet.entityList: for entityMention in entity.mentionList: if (tool is None or entityMention.entityMentionSet.metadata.tool == tool): u = entityMention.tokens.tokenizationId.uuidString mentions_by_tkzn_id[u].append(entityMention) return mentions_by_tkzn_id
def _print_situation_mention(situationMention): """Helper function for printing info for a SituationMention""" if situationMention.text: _p(10, 20, u"text", situationMention.text) if situationMention.situationType: _p(10, 20, u"situationType", situationMention.situationType) for arg_idx, ma in enumerate(lun(situationMention.argumentList)): print u" " * 10 + u"Argument %d:" % arg_idx if ma.role: _p(14, 16, u"role", ma.role) if ma.entityMention: _p(14, 16, u"entityMention", u" ".join(_get_tokens_for_entityMention(ma.entityMention))) # A SituationMention can have an argumentList with a # MentionArgument that points to another SituationMention--- # which could conceivably lead to loops. We currently don't # traverse the list recursively, instead looking at only # SituationMentions referenced by top-level SituationMentions if ma.situationMention: print u" " * 14 + u"situationMention:" if situationMention.text: _p(18, 20, u"text", situationMention.text) if situationMention.situationType: _p(18, 20, u"situationType", situationMention.situationType)
def _print_situation_mention(situationMention): """Helper function for printing info for a SituationMention""" if situationMention.text: _p(10, 20, u"text", situationMention.text) if situationMention.situationType: _p(10, 20, u"situationType", situationMention.situationType) for arg_idx, ma in enumerate(lun(situationMention.argumentList)): print u" " * 10 + u"Argument %d:" % arg_idx if ma.role: _p(14, 16, u"role", ma.role) if ma.entityMention: _p(14, 16, u"entityMention", u" ".join(get_tokens_for_entityMention(ma.entityMention))) # A SituationMention can have an argumentList with a # MentionArgument that points to another SituationMention--- # which could conceivably lead to loops. We currently don't # traverse the list recursively, instead looking at only # SituationMentions referenced by top-level SituationMentions if ma.situationMention: print u" " * 14 + u"situationMention:" if situationMention.text: _p(18, 20, u"text", situationMention.text) if situationMention.situationType: _p(18, 20, u"situationType", situationMention.situationType)
def get_comm_tokenizations(comm, tool=None): for section in lun(comm.sectionList): for sentence in lun(section.sentenceList): if tool is None or sentence.tokenization.metadata.tool == tool: yield sentence.tokenization
def validate_entity_mention_token_ref_sequences(comm): valid = True for entityMentionSet in lun(comm.entityMentionSetList): for entityMention in lun(entityMentionSet.mentionList): valid &= validate_token_ref_sequence(comm, entityMention.tokens) return valid
def print_metadata(comm, tool=None): """Print metadata for tools used to annotate Communication """ def _get_tokenizations(comm): tokenizations = [] if comm.sectionList: for section in comm.sectionList: if section.sentenceList: for sentence in section.sentenceList: if sentence.tokenization: tokenizations.append(sentence.tokenization) return tokenizations if tool is None or comm.metadata.tool == tool: print u"Communication: %s\n" % comm.metadata.tool dependency_parse_tools = set() parse_tools = set() tokenization_tools = set() token_tagging_tools = set() for tokenization in _get_tokenizations(comm): tokenization_tools.add(tokenization.metadata.tool) if tokenization.tokenTaggingList: for tokenTagging in tokenization.tokenTaggingList: token_tagging_tools.add(tokenTagging.metadata.tool) if tokenization.dependencyParseList: for dependencyParse in tokenization.dependencyParseList: dependency_parse_tools.add(dependencyParse.metadata.tool) if tokenization.parseList: for parse in tokenization.parseList: parse_tools.add(parse.metadata.tool) communication_tagging_tools = set() for communication_tagging in lun(comm.communicationTaggingList): communication_tagging_tools.add(communication_tagging.metadata.tool) if tool is not None: dependency_parse_tools = dependency_parse_tools.intersection([tool]) parse_tools = parse_tools.intersection([tool]) tokenization_tools = tokenization_tools.intersection([tool]) token_tagging_tools = token_tagging_tools.intersection([tool]) communication_tagging_tools = communication_tagging_tools.intersection( [tool]) if tokenization_tools: for toolname in sorted(tokenization_tools): print u" Tokenization: %s" % toolname print if dependency_parse_tools: for toolname in sorted(dependency_parse_tools): print u" Dependency Parse: %s" % toolname print if parse_tools: for toolname in sorted(parse_tools): print u" Parse: %s" % toolname print if token_tagging_tools: for toolname in sorted(token_tagging_tools): print u" TokenTagging: %s" % toolname print if comm.entityMentionSetList: for i, em_set in enumerate(comm.entityMentionSetList): if tool is None or em_set.metadata.tool == tool: print u" EntityMentionSet #%d: %s" % ( i, em_set.metadata.tool) print if comm.entitySetList: for i, entitySet in enumerate(comm.entitySetList): if tool is None or entitySet.metadata.tool == tool: print u" EntitySet #%d: %s" % ( i, entitySet.metadata.tool) print if comm.situationMentionSetList: for i, sm_set in enumerate(comm.situationMentionSetList): if tool is None or sm_set.metadata.tool == tool: print u" SituationMentionSet #%d: %s" % ( i, sm_set.metadata.tool) print if comm.situationSetList: for i, situationSet in enumerate(comm.situationSetList): if tool is None or situationSet.metadata.tool == tool: print u" SituationSet #%d: %s" % ( i, situationSet.metadata.tool) print if communication_tagging_tools: for toolname in sorted(communication_tagging_tools): print u" CommunicationTagging: %s" % toolname print