def test_generate_ind_template(self): generate_ind_template(PATH_MOUSE_NOMENCLATURE, PATH_GENERIC_OUTPUT_TSV) output = read_tsv(PATH_GENERIC_OUTPUT_TSV) _label = 2 _description = 18 _aliases = 19 _rank = 20 self.assertTrue("PCL:" + get_individual_id("CS202002013_123") in output) # child test_node = output["PCL:" + get_individual_id("CS202002013_123")] self.assertEqual("GABAergic", str(test_node[2])) self.assertTrue( str(test_node[_description]).startswith( "GABAergic is: Neurons that use GABA as a neurotransmitter")) self.assertEqual("Neuronal: GABAergic|Inhibitory neurons", test_node[_aliases]) self.assertEqual("Class", test_node[_rank]) self.assertTrue("PCL:" + get_individual_id("CS202002013_219") in output) # child test_node = output["PCL:" + get_individual_id("CS202002013_219")] self.assertEqual("Non-neural", str(test_node[2])) self.assertTrue( str(test_node[_description]).startswith( "Non-Neural is: Cells of mesoderm")) self.assertEqual("", test_node[_aliases]) self.assertEqual("Class", test_node[_rank]) self.assertEqual( "Cell Type|Subclass", output["PCL:" + get_individual_id("CS202002013_112")][_rank])
def generate_app_specific_template(taxonomy_file_path, output_filepath): if str(taxonomy_file_path).endswith(".json"): dend = dend_json_2_nodes_n_edges(taxonomy_file_path) else: dend = nomenclature_2_nodes_n_edges(taxonomy_file_path) robot_template_seed = { 'ID': 'ID', 'TYPE': 'TYPE', 'cell_set_color': "A ALLENHELP:cell_set_color" } dl = [robot_template_seed] for o in dend['nodes']: if "cell_set_color" in o and o["cell_set_color"]: d = dict() d['ID'] = 'PCL:' + get_individual_id(o['cell_set_accession']) d['TYPE'] = 'owl:NamedIndividual' d['cell_set_color'] = str(o["cell_set_color"]).strip() dl.append(d) robot_template = pd.DataFrame.from_records(dl) robot_template.to_csv(output_filepath, sep="\t", index=False)
def test_human_mtg_ids(self): self.assertEqual(get_class_id("CS1908210001"), "0023001") self.assertEqual(get_class_id("CS1908210148"), "0023148") self.assertEqual(get_individual_id("CS1908210001"), "0023501") self.assertEqual(get_individual_id("CS1908210148"), "0023648")
def test_marmoset_ids(self): self.assertEqual(get_class_id("CS201912132_1"), "0019001") self.assertEqual(get_class_id("CS201912132_121"), "0019121") self.assertEqual(get_individual_id("CS201912132_1"), "0019501") self.assertEqual(get_individual_id("CS201912132_121"), "0019621")
def test_human_ids(self): self.assertEqual(get_class_id("CS201912131_1"), "0015001") self.assertEqual(get_class_id("CS201912131_121"), "0015121") self.assertEqual(get_individual_id("CS201912131_1"), "0015501") self.assertEqual(get_individual_id("CS201912131_121"), "0015621")
def test_mouse_ids(self): self.assertEqual(get_class_id("CS202002013_1"), "0011001") self.assertEqual(get_class_id("CS202002013_121"), "0011121") self.assertEqual(get_individual_id("CS202002013_1"), "0011501") self.assertEqual(get_individual_id("CS202002013_121"), "0011621")
def generate_ind_template(taxonomy_file_path, output_filepath): path_parts = taxonomy_file_path.split(os.path.sep) taxon = path_parts[len(path_parts) - 1].split(".")[0] if str(taxonomy_file_path).endswith(".json"): dend = dend_json_2_nodes_n_edges(taxonomy_file_path) else: dend = nomenclature_2_nodes_n_edges(taxonomy_file_path) taxon = path_parts[len(path_parts) - 1].split(".")[0].replace( "nomenclature_table_", "") dend_tree = generate_dendrogram_tree(dend) taxonomy_config = read_taxonomy_config(taxon) allen_descriptions = read_allen_descriptions( ALLEN_DESCRIPTIONS_PATH, taxonomy_config['Species_abbv'][0]) subtrees = get_subtrees(dend_tree, taxonomy_config) robot_template_seed = { 'ID': 'ID', 'Label': 'LABEL', 'PrefLabel': 'A skos:prefLabel', 'Entity Type': 'TI %', 'TYPE': 'TYPE', 'Property Assertions': "I 'subcluster of' SPLIT=|", 'Synonyms': 'A oboInOwl:hasExactSynonym SPLIT=|', 'Cluster_ID': "A 'cluster id'", 'Function': 'TI capable_of some %', 'cell_set_preferred_alias': "A n2o:cell_set_preferred_alias", 'original_label': "A n2o:original_label", 'cell_set_label': "A n2o:cell_set_label", 'cell_set_aligned_alias': "A n2o:cell_set_aligned_alias", 'cell_set_additional_aliases': "A n2o:cell_set_additional_aliases SPLIT=|", 'cell_set_alias_assignee': "A n2o:cell_set_alias_assignee SPLIT=|", 'cell_set_alias_citation': "A n2o:cell_set_alias_citation SPLIT=|", 'Metadata': "A n2o:node_metadata", 'Exemplar_of': "TI 'exemplar data of' some %", 'Comment': "A rdfs:comment", 'Aliases': "A oboInOwl:hasRelatedSynonym SPLIT=|", 'Rank': "A 'cell_type_rank' SPLIT=|" } dl = [robot_template_seed] synonym_properties = [ 'cell_set_aligned_alias', 'cell_set_additional_aliases' ] for o in dend['nodes']: d = dict() d['ID'] = 'PCL:' + get_individual_id(o['cell_set_accession']) d['TYPE'] = 'owl:NamedIndividual' d['Label'] = o['cell_set_label'] + ' - ' + o['cell_set_accession'] if 'cell_set_preferred_alias' in o and o['cell_set_preferred_alias']: d['PrefLabel'] = o['cell_set_preferred_alias'] else: d['PrefLabel'] = o['cell_set_accession'] d['Entity Type'] = 'PCL:0010001' # Cluster d['Metadata'] = json.dumps(o) d['Synonyms'] = '|'.join([ o[prop] for prop in synonym_properties if prop in o.keys() and o[prop] ]) d['Property Assertions'] = '|'.join( sorted([ 'PCL:' + get_individual_id(e[1]) for e in dend['edges'] if e[0] == o['cell_set_accession'] ])) meta_properties = [ 'cell_set_preferred_alias', 'original_label', 'cell_set_label', 'cell_set_aligned_alias', 'cell_set_additional_aliases', 'cell_set_alias_assignee', 'cell_set_alias_citation' ] for prop in meta_properties: if prop in o.keys(): d[prop] = '|'.join([ prop_val.strip() for prop_val in str(o[prop]).split("|") if prop_val ]) else: d[prop] = '' d['Cluster_ID'] = o['cell_set_accession'] if o['cell_set_accession'] in set().union( *subtrees) and o['cell_set_preferred_alias']: d['Exemplar_of'] = PCL_BASE + get_class_id(o['cell_set_accession']) if "cell_type_card" in o: d['Rank'] = '|'.join([ cell_type.strip().replace("No", "None") for cell_type in str(o["cell_type_card"]).split(",") ]) if o['cell_set_accession'] in allen_descriptions: allen_data = allen_descriptions[o['cell_set_accession']] d['Comment'] = allen_data["summary"][0] if allen_data["aliases"][0]: d['Aliases'] = '|'.join([ alias.strip() for alias in str(allen_data["aliases"][0]).split("|") ]) # There should only be one! dl.append(d) robot_template = pd.DataFrame.from_records(dl) robot_template.to_csv(output_filepath, sep="\t", index=False)
def generate_base_class_template(taxonomy_file_path, output_filepath): taxon = extract_taxonomy_name_from_path(taxonomy_file_path) taxonomy_config = read_taxonomy_config(taxon) if taxonomy_config: if str(taxonomy_file_path).endswith(".json"): dend = dend_json_2_nodes_n_edges(taxonomy_file_path) else: dend = nomenclature_2_nodes_n_edges(taxonomy_file_path) dend_tree = generate_dendrogram_tree(dend) subtrees = get_subtrees(dend_tree, taxonomy_config) if "Reference_gene_list" in taxonomy_config: gene_db_path = ENSEMBLE_PATH.format( str(taxonomy_config["Reference_gene_list"][0]).strip().lower()) gene_names = read_gene_data(gene_db_path) minimal_markers = read_markers( MARKER_PATH.format(taxon.replace("CCN", "").replace("CS", "")), gene_names) allen_markers = read_markers( ALLEN_MARKER_PATH.format( taxon.replace("CCN", "").replace("CS", "")), gene_names) else: minimal_markers = {} allen_markers = {} class_seed = [ 'defined_class', 'prefLabel', 'Alias_citations', 'Synonyms_from_taxonomy', 'Gross_cell_type', 'Taxon', 'Brain_region', 'Minimal_markers', 'Allen_markers', 'Individual', 'Brain_region_abbv', 'Species_abbv', 'Cluster_ID', 'part_of', 'has_soma_location', 'aligned_alias', 'marker_gene_set' ] class_template = [] for o in dend['nodes']: if o['cell_set_accession'] in set.union( *subtrees) and (o['cell_set_preferred_alias'] or o['cell_set_additional_aliases']): d = dict() d['defined_class'] = PCL_BASE + get_class_id( o['cell_set_accession']) if o['cell_set_preferred_alias']: d['prefLabel'] = o['cell_set_preferred_alias'] elif o['cell_set_additional_aliases']: d['prefLabel'] = str( o['cell_set_additional_aliases']).split( EXPRESSION_SEPARATOR)[0] d['Synonyms_from_taxonomy'] = get_synonyms_from_taxonomy(o) d['Gross_cell_type'] = get_gross_cell_type( o['cell_set_accession'], subtrees, taxonomy_config) d['Taxon'] = taxonomy_config['Species'][0] d['Brain_region'] = taxonomy_config['Brain_region'][0] d['Cluster_ID'] = o['cell_set_accession'] if 'cell_set_alias_citation' in o and o[ 'cell_set_alias_citation']: alias_citations = [ citation.strip() for citation in str( o["cell_set_alias_citation"]).split("|") if citation and citation.strip() ] d["Alias_citations"] = "|".join(alias_citations) if o['cell_set_accession'] in minimal_markers: d['Minimal_markers'] = minimal_markers[ o['cell_set_accession']] if o['cell_set_accession'] in allen_markers: d['Allen_markers'] = allen_markers[o['cell_set_accession']] else: d['Allen_markers'] = '' if 'Brain_region_abbv' in taxonomy_config: d['Brain_region_abbv'] = taxonomy_config[ 'Brain_region_abbv'][0] if 'Species_abbv' in taxonomy_config: d['Species_abbv'] = taxonomy_config['Species_abbv'][0] d['Individual'] = PCL_BASE + get_individual_id( o['cell_set_accession']) for index, subtree in enumerate(subtrees): if o['cell_set_accession'] in subtree: location_rel = taxonomy_config['Root_nodes'][index][ 'Location_relation'] if location_rel == "part_of": d['part_of'] = taxonomy_config['Brain_region'][0] d['has_soma_location'] = '' elif location_rel == "has_soma_location": d['part_of'] = '' d['has_soma_location'] = taxonomy_config[ 'Brain_region'][0] if "cell_set_aligned_alias" in o and o[ "cell_set_aligned_alias"]: d['aligned_alias'] = o["cell_set_aligned_alias"] if o['cell_set_accession'] in minimal_markers: d['marker_gene_set'] = PCL_PREFIX + get_marker_gene_set_id( o['cell_set_accession']) for k in class_seed: if not (k in d.keys()): d[k] = '' class_template.append(d) class_robot_template = pd.DataFrame.from_records(class_template) class_robot_template.to_csv(output_filepath, sep="\t", index=False)