def send_to_output(self, result): self.progress_bar.finish() self.setStatusMessage('') etc_json, table_name = result # convert to table data = etc_to_table(etc_json, bool(self.gene_as_attr_name)) # set table name data.name = table_name # match genes gene_matcher = GeneMatcher(str(self.organism)) if not bool(self.gene_as_attr_name): if 'Gene' in data.domain: data = gene_matcher.match_table_column( data, 'Gene', StringVariable(ENTREZ_ID)) data.attributes[GENE_ID_COLUMN] = ENTREZ_ID else: gene_matcher.match_table_attributes(data) data.attributes[GENE_ID_ATTRIBUTE] = ENTREZ_ID # add table attributes data.attributes[TAX_ID] = str(self.organism) data.attributes[GENE_AS_ATTRIBUTE_NAME] = bool(self.gene_as_attr_name) # reset cache indicators self.set_cached_indicator() # send data to the output signal self.Outputs.etc_data.send(data)
def test_match_table_column(self): gm = GeneMatcher('4932') data = gm.match_table_column(Table('brown-selected.tab'), 'gene') self.assertTrue(ENTREZ_ID in data.domain)
def panglao_db(file_path: str): file_name = 'panglao_gene_markers.tab' reference, reference_url = 'PanglaoDB', 'https://panglaodb.se/' with gzip.open(file_path, 'rb') as f: content = f.read().decode('utf-8').strip() species = 0 gene_symbol = 1 cell_type = 2 genes_by_organism = defaultdict(list) organism_mapper = {'Mm': 'Mouse', 'Hs': 'Human'} def _gene_function_table(desc_col: StringVariable, gm_results: GeneMatcher): _domain = Domain([], metas=[desc_col]) _data = [[str(gene.description) if gene.description else ''] for gene in gm_results.genes] return Table(_domain, _data) for line in content.split('\n'): columns = line.split('\t') for org in columns[species].split(' '): if org in organism_mapper.keys(): gene_entry = [ organism_mapper[org], columns[gene_symbol], columns[cell_type], reference, reference_url ] genes_by_organism[organism_mapper[org]].append(gene_entry) domain = Domain( [], metas=[ StringVariable('Organism'), StringVariable('Name'), StringVariable('Cell Type'), StringVariable('Reference'), StringVariable('URL'), ], ) entrez_id_column = StringVariable('Entrez ID') description_column = StringVariable('Function') # construct data table for mouse gm_mouse = GeneMatcher('10090') mouse_table = Table(domain, genes_by_organism['Mouse']) mouse_table = gm_mouse.match_table_column(mouse_table, 'Name', entrez_id_column) mouse_table = Table.concatenate( [mouse_table, _gene_function_table(description_column, gm_mouse)]) # construct data table for human gm_human = GeneMatcher('9606') human_table = Table(domain, genes_by_organism['Human']) human_table = gm_human.match_table_column(human_table, 'Name', entrez_id_column) human_table = Table.concatenate( [human_table, _gene_function_table(description_column, gm_human)]) # return combined tables Table.concatenate([mouse_table, human_table], axis=0).save(f'data/marker_genes/{file_name}')