Esempio n. 1
0
    def test_generate_ind_template(self):
        generate_ind_template(PATH_MOUSE_NOMENCLATURE, PATH_GENERIC_OUTPUT_TSV)
        output = read_tsv(PATH_GENERIC_OUTPUT_TSV)

        _label = 2
        _description = 18
        _aliases = 19
        _rank = 20

        self.assertTrue("PCL:" + get_individual_id("CS202002013_123")
                        in output)  # child
        test_node = output["PCL:" + get_individual_id("CS202002013_123")]
        self.assertEqual("GABAergic", str(test_node[2]))
        self.assertTrue(
            str(test_node[_description]).startswith(
                "GABAergic is: Neurons that use GABA as a neurotransmitter"))
        self.assertEqual("Neuronal: GABAergic|Inhibitory neurons",
                         test_node[_aliases])
        self.assertEqual("Class", test_node[_rank])

        self.assertTrue("PCL:" + get_individual_id("CS202002013_219")
                        in output)  # child
        test_node = output["PCL:" + get_individual_id("CS202002013_219")]
        self.assertEqual("Non-neural", str(test_node[2]))
        self.assertTrue(
            str(test_node[_description]).startswith(
                "Non-Neural is: Cells of mesoderm"))
        self.assertEqual("", test_node[_aliases])
        self.assertEqual("Class", test_node[_rank])

        self.assertEqual(
            "Cell Type|Subclass",
            output["PCL:" + get_individual_id("CS202002013_112")][_rank])
def generate_app_specific_template(taxonomy_file_path, output_filepath):
    if str(taxonomy_file_path).endswith(".json"):
        dend = dend_json_2_nodes_n_edges(taxonomy_file_path)
    else:
        dend = nomenclature_2_nodes_n_edges(taxonomy_file_path)

    robot_template_seed = {
        'ID': 'ID',
        'TYPE': 'TYPE',
        'cell_set_color': "A ALLENHELP:cell_set_color"
    }
    dl = [robot_template_seed]

    for o in dend['nodes']:
        if "cell_set_color" in o and o["cell_set_color"]:
            d = dict()
            d['ID'] = 'PCL:' + get_individual_id(o['cell_set_accession'])
            d['TYPE'] = 'owl:NamedIndividual'
            d['cell_set_color'] = str(o["cell_set_color"]).strip()
            dl.append(d)

    robot_template = pd.DataFrame.from_records(dl)
    robot_template.to_csv(output_filepath, sep="\t", index=False)
    def test_human_mtg_ids(self):
        self.assertEqual(get_class_id("CS1908210001"), "0023001")
        self.assertEqual(get_class_id("CS1908210148"), "0023148")

        self.assertEqual(get_individual_id("CS1908210001"), "0023501")
        self.assertEqual(get_individual_id("CS1908210148"), "0023648")
    def test_marmoset_ids(self):
        self.assertEqual(get_class_id("CS201912132_1"), "0019001")
        self.assertEqual(get_class_id("CS201912132_121"), "0019121")

        self.assertEqual(get_individual_id("CS201912132_1"), "0019501")
        self.assertEqual(get_individual_id("CS201912132_121"), "0019621")
    def test_human_ids(self):
        self.assertEqual(get_class_id("CS201912131_1"), "0015001")
        self.assertEqual(get_class_id("CS201912131_121"), "0015121")

        self.assertEqual(get_individual_id("CS201912131_1"), "0015501")
        self.assertEqual(get_individual_id("CS201912131_121"), "0015621")
    def test_mouse_ids(self):
        self.assertEqual(get_class_id("CS202002013_1"), "0011001")
        self.assertEqual(get_class_id("CS202002013_121"), "0011121")

        self.assertEqual(get_individual_id("CS202002013_1"), "0011501")
        self.assertEqual(get_individual_id("CS202002013_121"), "0011621")
def generate_ind_template(taxonomy_file_path, output_filepath):
    path_parts = taxonomy_file_path.split(os.path.sep)
    taxon = path_parts[len(path_parts) - 1].split(".")[0]

    if str(taxonomy_file_path).endswith(".json"):
        dend = dend_json_2_nodes_n_edges(taxonomy_file_path)
    else:
        dend = nomenclature_2_nodes_n_edges(taxonomy_file_path)
        taxon = path_parts[len(path_parts) - 1].split(".")[0].replace(
            "nomenclature_table_", "")

    dend_tree = generate_dendrogram_tree(dend)
    taxonomy_config = read_taxonomy_config(taxon)
    allen_descriptions = read_allen_descriptions(
        ALLEN_DESCRIPTIONS_PATH, taxonomy_config['Species_abbv'][0])

    subtrees = get_subtrees(dend_tree, taxonomy_config)

    robot_template_seed = {
        'ID': 'ID',
        'Label': 'LABEL',
        'PrefLabel': 'A skos:prefLabel',
        'Entity Type': 'TI %',
        'TYPE': 'TYPE',
        'Property Assertions': "I 'subcluster of' SPLIT=|",
        'Synonyms': 'A oboInOwl:hasExactSynonym SPLIT=|',
        'Cluster_ID': "A 'cluster id'",
        'Function': 'TI capable_of some %',
        'cell_set_preferred_alias': "A n2o:cell_set_preferred_alias",
        'original_label': "A n2o:original_label",
        'cell_set_label': "A n2o:cell_set_label",
        'cell_set_aligned_alias': "A n2o:cell_set_aligned_alias",
        'cell_set_additional_aliases':
        "A n2o:cell_set_additional_aliases SPLIT=|",
        'cell_set_alias_assignee': "A n2o:cell_set_alias_assignee SPLIT=|",
        'cell_set_alias_citation': "A n2o:cell_set_alias_citation SPLIT=|",
        'Metadata': "A n2o:node_metadata",
        'Exemplar_of': "TI 'exemplar data of' some %",
        'Comment': "A rdfs:comment",
        'Aliases': "A oboInOwl:hasRelatedSynonym SPLIT=|",
        'Rank': "A 'cell_type_rank' SPLIT=|"
    }
    dl = [robot_template_seed]

    synonym_properties = [
        'cell_set_aligned_alias', 'cell_set_additional_aliases'
    ]

    for o in dend['nodes']:
        d = dict()
        d['ID'] = 'PCL:' + get_individual_id(o['cell_set_accession'])
        d['TYPE'] = 'owl:NamedIndividual'
        d['Label'] = o['cell_set_label'] + ' - ' + o['cell_set_accession']
        if 'cell_set_preferred_alias' in o and o['cell_set_preferred_alias']:
            d['PrefLabel'] = o['cell_set_preferred_alias']
        else:
            d['PrefLabel'] = o['cell_set_accession']
        d['Entity Type'] = 'PCL:0010001'  # Cluster
        d['Metadata'] = json.dumps(o)
        d['Synonyms'] = '|'.join([
            o[prop] for prop in synonym_properties
            if prop in o.keys() and o[prop]
        ])
        d['Property Assertions'] = '|'.join(
            sorted([
                'PCL:' + get_individual_id(e[1]) for e in dend['edges']
                if e[0] == o['cell_set_accession']
            ]))
        meta_properties = [
            'cell_set_preferred_alias', 'original_label', 'cell_set_label',
            'cell_set_aligned_alias', 'cell_set_additional_aliases',
            'cell_set_alias_assignee', 'cell_set_alias_citation'
        ]
        for prop in meta_properties:
            if prop in o.keys():
                d[prop] = '|'.join([
                    prop_val.strip() for prop_val in str(o[prop]).split("|")
                    if prop_val
                ])
            else:
                d[prop] = ''
        d['Cluster_ID'] = o['cell_set_accession']
        if o['cell_set_accession'] in set().union(
                *subtrees) and o['cell_set_preferred_alias']:
            d['Exemplar_of'] = PCL_BASE + get_class_id(o['cell_set_accession'])

        if "cell_type_card" in o:
            d['Rank'] = '|'.join([
                cell_type.strip().replace("No", "None")
                for cell_type in str(o["cell_type_card"]).split(",")
            ])

        if o['cell_set_accession'] in allen_descriptions:
            allen_data = allen_descriptions[o['cell_set_accession']]
            d['Comment'] = allen_data["summary"][0]
            if allen_data["aliases"][0]:
                d['Aliases'] = '|'.join([
                    alias.strip()
                    for alias in str(allen_data["aliases"][0]).split("|")
                ])

        # There should only be one!
        dl.append(d)
    robot_template = pd.DataFrame.from_records(dl)
    robot_template.to_csv(output_filepath, sep="\t", index=False)
def generate_base_class_template(taxonomy_file_path, output_filepath):
    taxon = extract_taxonomy_name_from_path(taxonomy_file_path)
    taxonomy_config = read_taxonomy_config(taxon)

    if taxonomy_config:
        if str(taxonomy_file_path).endswith(".json"):
            dend = dend_json_2_nodes_n_edges(taxonomy_file_path)
        else:
            dend = nomenclature_2_nodes_n_edges(taxonomy_file_path)
        dend_tree = generate_dendrogram_tree(dend)
        subtrees = get_subtrees(dend_tree, taxonomy_config)

        if "Reference_gene_list" in taxonomy_config:
            gene_db_path = ENSEMBLE_PATH.format(
                str(taxonomy_config["Reference_gene_list"][0]).strip().lower())
            gene_names = read_gene_data(gene_db_path)
            minimal_markers = read_markers(
                MARKER_PATH.format(taxon.replace("CCN", "").replace("CS", "")),
                gene_names)
            allen_markers = read_markers(
                ALLEN_MARKER_PATH.format(
                    taxon.replace("CCN", "").replace("CS", "")), gene_names)
        else:
            minimal_markers = {}
            allen_markers = {}

        class_seed = [
            'defined_class', 'prefLabel', 'Alias_citations',
            'Synonyms_from_taxonomy', 'Gross_cell_type', 'Taxon',
            'Brain_region', 'Minimal_markers', 'Allen_markers', 'Individual',
            'Brain_region_abbv', 'Species_abbv', 'Cluster_ID', 'part_of',
            'has_soma_location', 'aligned_alias', 'marker_gene_set'
        ]
        class_template = []

        for o in dend['nodes']:
            if o['cell_set_accession'] in set.union(
                    *subtrees) and (o['cell_set_preferred_alias']
                                    or o['cell_set_additional_aliases']):
                d = dict()
                d['defined_class'] = PCL_BASE + get_class_id(
                    o['cell_set_accession'])
                if o['cell_set_preferred_alias']:
                    d['prefLabel'] = o['cell_set_preferred_alias']
                elif o['cell_set_additional_aliases']:
                    d['prefLabel'] = str(
                        o['cell_set_additional_aliases']).split(
                            EXPRESSION_SEPARATOR)[0]
                d['Synonyms_from_taxonomy'] = get_synonyms_from_taxonomy(o)
                d['Gross_cell_type'] = get_gross_cell_type(
                    o['cell_set_accession'], subtrees, taxonomy_config)
                d['Taxon'] = taxonomy_config['Species'][0]
                d['Brain_region'] = taxonomy_config['Brain_region'][0]
                d['Cluster_ID'] = o['cell_set_accession']
                if 'cell_set_alias_citation' in o and o[
                        'cell_set_alias_citation']:
                    alias_citations = [
                        citation.strip() for citation in str(
                            o["cell_set_alias_citation"]).split("|")
                        if citation and citation.strip()
                    ]
                    d["Alias_citations"] = "|".join(alias_citations)
                if o['cell_set_accession'] in minimal_markers:
                    d['Minimal_markers'] = minimal_markers[
                        o['cell_set_accession']]
                if o['cell_set_accession'] in allen_markers:
                    d['Allen_markers'] = allen_markers[o['cell_set_accession']]
                else:
                    d['Allen_markers'] = ''
                if 'Brain_region_abbv' in taxonomy_config:
                    d['Brain_region_abbv'] = taxonomy_config[
                        'Brain_region_abbv'][0]
                if 'Species_abbv' in taxonomy_config:
                    d['Species_abbv'] = taxonomy_config['Species_abbv'][0]
                d['Individual'] = PCL_BASE + get_individual_id(
                    o['cell_set_accession'])

                for index, subtree in enumerate(subtrees):
                    if o['cell_set_accession'] in subtree:
                        location_rel = taxonomy_config['Root_nodes'][index][
                            'Location_relation']
                        if location_rel == "part_of":
                            d['part_of'] = taxonomy_config['Brain_region'][0]
                            d['has_soma_location'] = ''
                        elif location_rel == "has_soma_location":
                            d['part_of'] = ''
                            d['has_soma_location'] = taxonomy_config[
                                'Brain_region'][0]

                if "cell_set_aligned_alias" in o and o[
                        "cell_set_aligned_alias"]:
                    d['aligned_alias'] = o["cell_set_aligned_alias"]
                if o['cell_set_accession'] in minimal_markers:
                    d['marker_gene_set'] = PCL_PREFIX + get_marker_gene_set_id(
                        o['cell_set_accession'])

                for k in class_seed:
                    if not (k in d.keys()):
                        d[k] = ''
                class_template.append(d)

        class_robot_template = pd.DataFrame.from_records(class_template)
        class_robot_template.to_csv(output_filepath, sep="\t", index=False)