def _process_query_dict(self, query_dict, valid_keys, kwargs): if query_dict is None: query_dict = {} for k, v in kwargs.items(): if k in valid_keys: query_dict[k] = v else: query_dict['ot:' + k] = v nq = len(query_dict) if nq == 0: if self.use_v1: raise ValueError( 'The property/value pairs for the query should be passed in as keyword arguments' ) return None if nq > 1: raise NotImplementedError( 'Currently only searches for one property/value pair are supported' ) k = list(query_dict.keys())[0] if k not in valid_keys: m = '"{k}" is not a valid search term. Expecting it to be one of the following: {kl}' m = m.format(k=k, kl=repr(valid_keys)) raise ValueError(m) v = query_dict[k] if not is_str_type(v): v = UNICODE(v) if k == 'ot:studyPublication': v = doi2url(v) return (k, v)
def _process_query_dict(self, query_dict, valid_keys, kwargs): if query_dict is None: query_dict = {} for k, v in kwargs.items(): if k in valid_keys: query_dict[k] = v else: query_dict['ot:' + k] = v nq = len(query_dict) if nq == 0: if self.use_v1: raise ValueError('The property/value pairs for the query should be passed in as keyword arguments') return None if nq > 1: raise NotImplementedError('Currently only searches for one property/value pair are supported') k = list(query_dict.keys())[0] if k not in valid_keys: m = '"{k}" is not a valid search term. Expecting it to be one of the following: {kl}' m = m.format(k=k, kl=repr(valid_keys)) raise ValueError(m) v = query_dict[k] if not is_str_type(v): v = UNICODE(v) if k == 'ot:studyPublication': v = doi2url(v) return (k, v)
def testTreeBaseImport(self): fp = pathmap.nexml_source_path('S15515.xml') n = get_ot_study_info_from_treebase_nexml(src=fp, merge_blocks=True, sort_arbitrary=True) # did we successfully coerce its DOI to the required URL form? self.assertTrue('@href' in n['nexml']['^ot:studyPublication']) test_doi = n['nexml']['^ot:studyPublication']['@href'] self.assertTrue(test_doi == doi2url(test_doi)) # furthermore, the output should exactly match our test file expected = pathmap.nexson_obj('S15515.json') equal_blob_check(self, 'S15515', n, expected) self.assertTrue(expected == n)
def get_ot_study_info_from_treebase_nexml( src=None, nexml_content=None, encoding=u'utf8', nexson_syntax_version=DEFAULT_NEXSON_VERSION, merge_blocks=True, sort_arbitrary=False): """Normalize treebase-specific metadata into the locations where open tree of life software that expects it. See get_ot_study_info_from_nexml for the explanation of the src, nexml_content, encoding, and nexson_syntax_version arguments If merge_blocks is True then peyotl.manip.merge_otus_and_trees Actions to "normalize" TreeBase objects to ot Nexson 1. the meta id for any meta item that has only a value and an id 2. throw away rdfs:isDefinedBy 3. otu @label -> otu ^ot:originalLabel 4. ^tb:indentifier.taxon, ^tb:indentifier.taxonVariant and some skos:closeMatch fields to ^ot:taxonLink 5. remove "@xml:base" 6. coerce edge lengths to native types """ # pylint: disable=R0915 raw = get_ot_study_info_from_nexml( src=src, nexml_content=nexml_content, encoding=encoding, nexson_syntax_version=BY_ID_HONEY_BADGERFISH) nexml = raw['nexml'] SKOS_ALT_LABEL = '^skos:altLabel' SKOS_CLOSE_MATCH = '^skos:closeMatch' strippable_pre = { 'http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:': '@ubio', 'http://purl.uniprot.org/taxonomy/': '@uniprot', } moveable2taxon_link = { "^tb:identifier.taxon": '@tb:identifier.taxon', "^tb:identifier.taxonVariant": '@tb:identifier.taxonVariant', } to_del = ['^rdfs:isDefinedBy', '@xml:base'] for tag in to_del: if tag in nexml: del nexml[tag] _simplify_all_meta_by_id_del(nexml) _otu2label = {} prefix_map = {} # compose dataDeposit nexid = nexml['@id'] tb_url = 'http://purl.org/phylo/treebase/phylows/study/TB2:' + nexid nexml['^ot:dataDeposit'] = {'@href': tb_url} # compose dataDeposit bd = nexml.get("^dcterms:bibliographicCitation") if bd: nexml['^ot:studyPublicationReference'] = bd doi = nexml.get('^prism:doi') if doi: doi = doi2url(doi) nexml['^ot:studyPublication'] = {'@href': doi} year = nexml.get('^prism:publicationDate') if year: try: nexml['^ot:studyYear'] = int(year) except: pass # for otus in nexml['otusById'].values(): for tag in to_del: if tag in otus: del otus[tag] _simplify_all_meta_by_id_del(otus) for oid, otu in otus['otuById'].items(): for tag in to_del: if tag in otu: del otu[tag] _simplify_all_meta_by_id_del(otu) label = otu['@label'] _otu2label[oid] = label otu['^ot:originalLabel'] = label del otu['@label'] al = otu.get(SKOS_ALT_LABEL) if al is not None: if otu.get('^ot:altLabel') is None: otu['^ot:altLabel'] = al del otu[SKOS_ALT_LABEL] tl = {} scm = otu.get(SKOS_CLOSE_MATCH) # _LOG.debug('scm = ' + str(scm)) if scm: if isinstance(scm, dict): h = scm.get('@href') if h: try: for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident del otu[SKOS_CLOSE_MATCH] prefix_map[t] = p except: pass else: nm = [] try: for el in scm: h = el.get('@href') if h: found = False for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident found = True prefix_map[t] = p break if not found: nm.append(el) except: pass if len(nm) < len(scm): if len(nm) > 1: otu[SKOS_CLOSE_MATCH] = nm elif len(nm) == 1: otu[SKOS_CLOSE_MATCH] = nm[0] else: del otu[SKOS_CLOSE_MATCH] # _LOG.debug('tl =' + str(tl)) for k, t in moveable2taxon_link.items(): al = otu.get(k) if al: tl[t] = al del otu[k] if tl: otu['^ot:taxonLink'] = tl for trees in nexml['treesById'].values(): for tag in to_del: if tag in trees: del trees[tag] _simplify_all_meta_by_id_del(trees) for tree in trees['treeById'].values(): for tag in to_del: if tag in tree: del tree[tag] _simplify_all_meta_by_id_del(tree) tt = tree.get('@xsi:type', 'nex:FloatTree') if tt.lower() == 'nex:inttree': e_len_coerce = int else: e_len_coerce = float for edge_d in tree['edgeBySourceId'].values(): for edge in edge_d.values(): try: x = e_len_coerce(edge['@length']) edge['@length'] = x except: pass for node in tree['nodeById'].values(): nl = node.get('@label') if nl: no = node.get('@otu') if no and _otu2label[no] == nl: del node['@label'] if prefix_map: nexml['^ot:taxonLinkPrefixes'] = prefix_map if merge_blocks: from peyotl.manip import merge_otus_and_trees merge_otus_and_trees(raw) if nexson_syntax_version != BY_ID_HONEY_BADGERFISH: convert_nexson_format(raw, nexson_syntax_version, current_format=BY_ID_HONEY_BADGERFISH, sort_arbitrary=sort_arbitrary) elif sort_arbitrary: sort_arbitrarily_ordered_nexson(raw) return raw
def get_ot_study_info_from_treebase_nexml(src=None, nexml_content=None, encoding=u'utf8', nexson_syntax_version=DEFAULT_NEXSON_VERSION, merge_blocks=True, sort_arbitrary=False): """Normalize treebase-specific metadata into the locations where open tree of life software that expects it. See get_ot_study_info_from_nexml for the explanation of the src, nexml_content, encoding, and nexson_syntax_version arguments If merge_blocks is True then peyotl.manip.merge_otus_and_trees Actions to "normalize" TreeBase objects to ot Nexson 1. the meta id for any meta item that has only a value and an id 2. throw away rdfs:isDefinedBy 3. otu @label -> otu ^ot:originalLabel 4. ^tb:indentifier.taxon, ^tb:indentifier.taxonVariant and some skos:closeMatch fields to ^ot:taxonLink 5. remove "@xml:base" 6. coerce edge lengths to native types """ # pylint: disable=R0915 raw = get_ot_study_info_from_nexml(src=src, nexml_content=nexml_content, encoding=encoding, nexson_syntax_version=BY_ID_HONEY_BADGERFISH) nexml = raw['nexml'] SKOS_ALT_LABEL = '^skos:altLabel' SKOS_CLOSE_MATCH = '^skos:closeMatch' strippable_pre = { 'http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:': '@ubio', 'http://purl.uniprot.org/taxonomy/': '@uniprot', } moveable2taxon_link = {"^tb:identifier.taxon": '@tb:identifier.taxon', "^tb:identifier.taxonVariant": '@tb:identifier.taxonVariant', } to_del = ['^rdfs:isDefinedBy', '@xml:base'] for tag in to_del: if tag in nexml: del nexml[tag] _simplify_all_meta_by_id_del(nexml) _otu2label = {} prefix_map = {} # compose dataDeposit nexid = nexml['@id'] tb_url = 'http://purl.org/phylo/treebase/phylows/study/TB2:' + nexid nexml['^ot:dataDeposit'] = {'@href': tb_url} # compose dataDeposit bd = nexml.get("^dcterms:bibliographicCitation") if bd: nexml['^ot:studyPublicationReference'] = bd doi = nexml.get('^prism:doi') if doi: doi = doi2url(doi) nexml['^ot:studyPublication'] = {'@href': doi} year = nexml.get('^prism:publicationDate') if year: try: nexml['^ot:studyYear'] = int(year) except: pass # for otus in nexml['otusById'].values(): for tag in to_del: if tag in otus: del otus[tag] _simplify_all_meta_by_id_del(otus) for oid, otu in otus['otuById'].items(): for tag in to_del: if tag in otu: del otu[tag] _simplify_all_meta_by_id_del(otu) label = otu['@label'] _otu2label[oid] = label otu['^ot:originalLabel'] = label del otu['@label'] al = otu.get(SKOS_ALT_LABEL) if al is not None: if otu.get('^ot:altLabel') is None: otu['^ot:altLabel'] = al del otu[SKOS_ALT_LABEL] tl = {} scm = otu.get(SKOS_CLOSE_MATCH) # _LOG.debug('scm = ' + str(scm)) if scm: if isinstance(scm, dict): h = scm.get('@href') if h: try: for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident del otu[SKOS_CLOSE_MATCH] prefix_map[t] = p except: pass else: nm = [] try: for el in scm: h = el.get('@href') if h: found = False for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident found = True prefix_map[t] = p break if not found: nm.append(el) except: pass if len(nm) < len(scm): if len(nm) > 1: otu[SKOS_CLOSE_MATCH] = nm elif len(nm) == 1: otu[SKOS_CLOSE_MATCH] = nm[0] else: del otu[SKOS_CLOSE_MATCH] # _LOG.debug('tl =' + str(tl)) for k, t in moveable2taxon_link.items(): al = otu.get(k) if al: tl[t] = al del otu[k] if tl: otu['^ot:taxonLink'] = tl for trees in nexml['treesById'].values(): for tag in to_del: if tag in trees: del trees[tag] _simplify_all_meta_by_id_del(trees) for tree in trees['treeById'].values(): for tag in to_del: if tag in tree: del tree[tag] _simplify_all_meta_by_id_del(tree) tt = tree.get('@xsi:type', 'nex:FloatTree') if tt.lower() == 'nex:inttree': e_len_coerce = int else: e_len_coerce = float for edge_d in tree['edgeBySourceId'].values(): for edge in edge_d.values(): try: x = e_len_coerce(edge['@length']) edge['@length'] = x except: pass for node in tree['nodeById'].values(): nl = node.get('@label') if nl: no = node.get('@otu') if no and _otu2label[no] == nl: del node['@label'] if prefix_map: nexml['^ot:taxonLinkPrefixes'] = prefix_map if merge_blocks: from peyotl.manip import merge_otus_and_trees merge_otus_and_trees(raw) if nexson_syntax_version != BY_ID_HONEY_BADGERFISH: convert_nexson_format(raw, nexson_syntax_version, current_format=BY_ID_HONEY_BADGERFISH, sort_arbitrary=sort_arbitrary) elif sort_arbitrary: sort_arbitrarily_ordered_nexson(raw) return raw
def __init__(self, obj, errors=None, **kwargs): if errors is None: errors = [] try: # Python 2.x string_types = (str, unicode) except NameError: # Python 3 string_types = (str, ) self.required_toplevel_elements = { # N.B. anyjson might parse a text element as str or unicode, # depending on its value. Either is fine here. 'curator': dict, 'date_created': string_types, 'taxa': list, 'user_agent': string_types, } self.optional_toplevel_elements = { 'id': string_types, # not present in initial request 'study_id': string_types, 'new_ottids_required': int, # provided by some agents } # track unknown keys in top-level object uk = None for k in obj.keys(): if (k not in self.required_toplevel_elements.keys() and k not in self.optional_toplevel_elements.keys()): if uk is None: uk = [] uk.append(k) if uk: uk.sort() # self._warn_event(_NEXEL.TOP_LEVEL, # obj=obj, # err_type=gen_UnrecognizedKeyWarning, # anc=_EMPTY_TUPLE, # obj_nex_id=None, # key_list=uk) errors.append( "Found these unexpected top-level properties: {k}".format( k=uk)) # test for existence and types of all required elements for el_key, el_type in self.required_toplevel_elements.items(): test_el = obj.get(el_key, None) try: assert test_el is not None except: errors.append("Property '{p}' not found!".format(p=el_key)) try: assert isinstance(test_el, el_type) except: errors.append( "Property '{p}' should be one of these: {t}".format( p=el_key, t=el_type)) # test a non-empty id against our expected pattern self._id = obj.get('id') if self._id and isinstance(self._id, string_types): try: from peyotl.amendments import AMENDMENT_ID_PATTERN assert bool(AMENDMENT_ID_PATTERN.match(self._id)) except: errors.append( "The top-level amendment 'id' provided is not valid") # test a non-empty curator for expected 'login' and 'name' fields self._curator = obj.get('curator') if isinstance(self._curator, dict): for k in self._curator.keys(): try: assert k in [ 'login', 'name', 'email', ] except: errors.append( "Unexpected key '{k}' found in curator".format(k=k)) if 'login' in self._curator: try: assert isinstance(self._curator.get('name'), string_types) except: errors.append("Curator 'name' should be a string") if 'name' in self._curator: try: assert isinstance(self._curator.get('login'), string_types) except: errors.append("Curator 'login' should be a string") if 'email' in self._curator: try: assert isinstance(self._curator.get('email'), string_types) except: # TODO: Attempt to validate as an email address? errors.append( "Curator 'email' should be a string (a valid email address)" ) # test for a valid date_created (should be valid ISO 8601) self._date_created = obj.get('date_created') import dateutil.parser try: dateutil.parser.parse(self._date_created) except: errors.append("Property 'date_created' is not a valid ISO date") # test for a valid study_id (if it's not an empty string) self._study_id = obj.get('study_id') if self._study_id and isinstance(self._study_id, string_types): from peyotl.phylesystem import STUDY_ID_PATTERN try: assert bool(STUDY_ID_PATTERN.match(self._study_id)) except: errors.append("The 'study_id' provided is not valid") # text taxa for required properties, valid types+values, etc. self._taxa = obj.get('taxa') if isinstance(self._taxa, list): # N.B. required property cannot be empty! self.required_toplevel_taxon_elements = { 'name': string_types, 'name_derivation': string_types, # from controlled vocabulary 'sources': list, } self.optional_toplevel_taxon_elements = { 'comment': string_types, 'rank': string_types, # can be 'no rank' 'original_label': string_types, 'adjusted_label': string_types, 'parent': int, # the parent taxon's OTT id 'parent_tag': string_types, 'tag': object, # can be anything (int, string, ...) 'ott_id': int # if already assigned } # N.B. we should reject any unknown keys (not listed above)! uk = None for taxon in self._taxa: for k in taxon.keys(): if (k not in self.required_toplevel_taxon_elements.keys() and k not in self.optional_toplevel_taxon_elements.keys()): if uk is None: uk = [] uk.append(k) for el_key, el_type in self.required_toplevel_taxon_elements.items( ): test_el = taxon.get(el_key, None) try: assert test_el is not None except: errors.append( "Required taxon property '{p}' not found!".format( p=el_key)) try: assert isinstance(test_el, el_type) except: errors.append( "Taxon property '{p}' should be one of these: {t}". format(p=el_key, t=el_type)) # TODO: name_derivation should be one of a limited set of values # any optional properties found should also be of the required type(s) for el_key, el_type in self.optional_toplevel_taxon_elements.items( ): if el_key in taxon: test_el = taxon.get(el_key, None) try: assert isinstance(test_el, el_type) except: errors.append( "Taxon property '{p}' should be one of these: {t}" .format(p=el_key, t=el_type)) # each taxon must have either 'parent' or 'parent_tag'! try: assert ('parent' in taxon) or ('parent_tag' in taxon) except: errors.append( "Taxon has neither 'parent' nor 'parent_tag'!") # we need at least one source with type and (sometimes) non-empty value self.source_types_requiring_value = [ 'Link to online taxonomy', 'Link (DOI) to publication', 'Other', ] self.source_types_not_requiring_value = [ 'The taxon is described in this study', ] self.source_types_requiring_URL = [ 'Link to online taxonomy', 'Link (DOI) to publication', ] valid_source_found = False if len(taxon.get('sources')) > 0: for s in taxon.get('sources'): s_type = s.get('source_type', None) try: assert (s_type in self.source_types_requiring_value or s_type in self.source_types_not_requiring_value) if s_type in self.source_types_requiring_value: try: # the 'source' (value) field should be a non-empty string assert s.get('source', None) valid_source_found = True except: errors.append( "Missing value for taxon source of type '{t}'!" .format(t=s_type)) else: valid_source_found = True if s_type in self.source_types_requiring_URL: try: # its value should contain a URL (ie, conversion does nothing) s_val = s.get('source') assert s_val == doi2url(s_val) except: errors.append( "Source '{s}' (of type '{t}') should be a URL!" .format(s=s_val, t=s_type)) except: errors.append( "Unknown taxon source type '{t}'!".format( t=s_type)) if not valid_source_found: errors.append( "Taxon must have at least one valid source (none found)!" ) if uk: uk.sort() errors.append( "Found these unexpected taxon properties: {k}".format( k=uk))
def __init__(self, obj, errors=None, **kwargs): if errors is None: errors = [] try: # Python 2.x string_types = (str, unicode) except NameError: # Python 3 string_types = (str,) self.required_toplevel_elements = { # N.B. anyjson might parse a text element as str or unicode, # depending on its value. Either is fine here. 'curator': dict, 'date_created': string_types, 'taxa': list, 'user_agent': string_types, } self.optional_toplevel_elements = { 'id': string_types, # not present in initial request 'study_id': string_types, 'new_ottids_required': int, # provided by some agents } # track unknown keys in top-level object uk = None for k in obj.keys(): if (k not in self.required_toplevel_elements.keys() and k not in self.optional_toplevel_elements.keys()): if uk is None: uk = [] uk.append(k) if uk: uk.sort() # self._warn_event(_NEXEL.TOP_LEVEL, # obj=obj, # err_type=gen_UnrecognizedKeyWarning, # anc=_EMPTY_TUPLE, # obj_nex_id=None, # key_list=uk) errors.append("Found these unexpected top-level properties: {k}".format(k=uk)) # test for existence and types of all required elements for el_key, el_type in self.required_toplevel_elements.items(): test_el = obj.get(el_key, None) try: assert test_el is not None except: errors.append("Property '{p}' not found!".format(p=el_key)) try: assert isinstance(test_el, el_type) except: errors.append("Property '{p}' should be one of these: {t}".format(p=el_key, t=el_type)) # test a non-empty id against our expected pattern self._id = obj.get('id') if self._id and isinstance(self._id, string_types): try: from peyotl.amendments import AMENDMENT_ID_PATTERN assert bool(AMENDMENT_ID_PATTERN.match(self._id)) except: errors.append("The top-level amendment 'id' provided is not valid") # test a non-empty curator for expected 'login' and 'name' fields self._curator = obj.get('curator') if isinstance(self._curator, dict): for k in self._curator.keys(): try: assert k in ['login', 'name', 'email', ] except: errors.append("Unexpected key '{k}' found in curator".format(k=k)) if 'login' in self._curator: try: assert isinstance(self._curator.get('name'), string_types) except: errors.append("Curator 'name' should be a string") if 'name' in self._curator: try: assert isinstance(self._curator.get('login'), string_types) except: errors.append("Curator 'login' should be a string") if 'email' in self._curator: try: assert isinstance(self._curator.get('email'), string_types) except: # TODO: Attempt to validate as an email address? errors.append("Curator 'email' should be a string (a valid email address)") # test for a valid date_created (should be valid ISO 8601) self._date_created = obj.get('date_created') import dateutil.parser try: dateutil.parser.parse(self._date_created) except: errors.append("Property 'date_created' is not a valid ISO date") # test for a valid study_id (if it's not an empty string) self._study_id = obj.get('study_id') if self._study_id and isinstance(self._study_id, string_types): from peyotl.phylesystem import STUDY_ID_PATTERN try: assert bool(STUDY_ID_PATTERN.match(self._study_id)) except: errors.append("The 'study_id' provided is not valid") # text taxa for required properties, valid types+values, etc. self._taxa = obj.get('taxa') if isinstance(self._taxa, list): # N.B. required property cannot be empty! self.required_toplevel_taxon_elements = { 'name': string_types, 'name_derivation': string_types, # from controlled vocabulary 'sources': list, } self.optional_toplevel_taxon_elements = { 'comment': string_types, 'rank': string_types, # can be 'no rank' 'original_label': string_types, 'adjusted_label': string_types, 'parent': int, # the parent taxon's OTT id 'parent_tag': string_types, 'tag': object, # can be anything (int, string, ...) 'ott_id': int # if already assigned } # N.B. we should reject any unknown keys (not listed above)! uk = None for taxon in self._taxa: for k in taxon.keys(): if (k not in self.required_toplevel_taxon_elements.keys() and k not in self.optional_toplevel_taxon_elements.keys()): if uk is None: uk = [] uk.append(k) for el_key, el_type in self.required_toplevel_taxon_elements.items(): test_el = taxon.get(el_key, None) try: assert test_el is not None except: errors.append("Required taxon property '{p}' not found!".format(p=el_key)) try: assert isinstance(test_el, el_type) except: errors.append("Taxon property '{p}' should be one of these: {t}".format(p=el_key, t=el_type)) # TODO: name_derivation should be one of a limited set of values # any optional properties found should also be of the required type(s) for el_key, el_type in self.optional_toplevel_taxon_elements.items(): if el_key in taxon: test_el = taxon.get(el_key, None) try: assert isinstance(test_el, el_type) except: errors.append( "Taxon property '{p}' should be one of these: {t}".format(p=el_key, t=el_type)) # each taxon must have either 'parent' or 'parent_tag'! try: assert ('parent' in taxon) or ('parent_tag' in taxon) except: errors.append("Taxon has neither 'parent' nor 'parent_tag'!") # we need at least one source with type and (sometimes) non-empty value self.source_types_requiring_value = [ 'Link to online taxonomy', 'Link (DOI) to publication', 'Other', ] self.source_types_not_requiring_value = [ 'The taxon is described in this study', ] self.source_types_requiring_URL = [ 'Link to online taxonomy', 'Link (DOI) to publication', ] valid_source_found = False if len(taxon.get('sources')) > 0: for s in taxon.get('sources'): s_type = s.get('source_type', None) try: assert (s_type in self.source_types_requiring_value or s_type in self.source_types_not_requiring_value) if s_type in self.source_types_requiring_value: try: # the 'source' (value) field should be a non-empty string assert s.get('source', None) valid_source_found = True except: errors.append("Missing value for taxon source of type '{t}'!".format(t=s_type)) else: valid_source_found = True if s_type in self.source_types_requiring_URL: try: # its value should contain a URL (ie, conversion does nothing) s_val = s.get('source') assert s_val == doi2url(s_val) except: errors.append("Source '{s}' (of type '{t}') should be a URL!".format(s=s_val, t=s_type)) except: errors.append("Unknown taxon source type '{t}'!".format(t=s_type)) if not valid_source_found: errors.append("Taxon must have at least one valid source (none found)!") if uk: uk.sort() errors.append("Found these unexpected taxon properties: {k}".format(k=uk))