Ejemplo n.º 1
0
    def send_to_output(self, result):
        self.progress_bar.finish()
        self.setStatusMessage('')

        etc_json, table_name = result

        # convert to table
        data = etc_to_table(etc_json, bool(self.gene_as_attr_name))
        # set table name
        data.name = table_name

        # match genes
        gene_matcher = GeneMatcher(str(self.organism))

        if not bool(self.gene_as_attr_name):
            if 'Gene' in data.domain:
                data = gene_matcher.match_table_column(
                    data, 'Gene', StringVariable(ENTREZ_ID))
            data.attributes[GENE_ID_COLUMN] = ENTREZ_ID
        else:
            gene_matcher.match_table_attributes(data)
            data.attributes[GENE_ID_ATTRIBUTE] = ENTREZ_ID

        # add table attributes
        data.attributes[TAX_ID] = str(self.organism)
        data.attributes[GENE_AS_ATTRIBUTE_NAME] = bool(self.gene_as_attr_name)

        # reset cache indicators
        self.set_cached_indicator()
        # send data to the output signal
        self.Outputs.etc_data.send(data)
    def test_match_table_column(self):
        gm = GeneMatcher('4932')

        data = gm.match_table_column(Table('brown-selected.tab'), 'gene')
        self.assertTrue(ENTREZ_ID in data.domain)
Ejemplo n.º 3
0
def panglao_db(file_path: str):
    file_name = 'panglao_gene_markers.tab'
    reference, reference_url = 'PanglaoDB', 'https://panglaodb.se/'

    with gzip.open(file_path, 'rb') as f:
        content = f.read().decode('utf-8').strip()

    species = 0
    gene_symbol = 1
    cell_type = 2
    genes_by_organism = defaultdict(list)
    organism_mapper = {'Mm': 'Mouse', 'Hs': 'Human'}

    def _gene_function_table(desc_col: StringVariable,
                             gm_results: GeneMatcher):
        _domain = Domain([], metas=[desc_col])
        _data = [[str(gene.description) if gene.description else '']
                 for gene in gm_results.genes]
        return Table(_domain, _data)

    for line in content.split('\n'):
        columns = line.split('\t')

        for org in columns[species].split(' '):
            if org in organism_mapper.keys():
                gene_entry = [
                    organism_mapper[org], columns[gene_symbol],
                    columns[cell_type], reference, reference_url
                ]
                genes_by_organism[organism_mapper[org]].append(gene_entry)

    domain = Domain(
        [],
        metas=[
            StringVariable('Organism'),
            StringVariable('Name'),
            StringVariable('Cell Type'),
            StringVariable('Reference'),
            StringVariable('URL'),
        ],
    )

    entrez_id_column = StringVariable('Entrez ID')
    description_column = StringVariable('Function')

    # construct data table for mouse
    gm_mouse = GeneMatcher('10090')
    mouse_table = Table(domain, genes_by_organism['Mouse'])
    mouse_table = gm_mouse.match_table_column(mouse_table, 'Name',
                                              entrez_id_column)
    mouse_table = Table.concatenate(
        [mouse_table,
         _gene_function_table(description_column, gm_mouse)])

    # construct data table for human
    gm_human = GeneMatcher('9606')
    human_table = Table(domain, genes_by_organism['Human'])
    human_table = gm_human.match_table_column(human_table, 'Name',
                                              entrez_id_column)
    human_table = Table.concatenate(
        [human_table,
         _gene_function_table(description_column, gm_human)])

    # return combined tables
    Table.concatenate([mouse_table, human_table],
                      axis=0).save(f'data/marker_genes/{file_name}')