def test_case_insensitive(self):
        organism = '10090'
        original_name = 'Pou4f1'
        ncbi_id = 18996

        gene_matcher = gene.GeneMatcher(organism)
        gene_matcher.genes = [
            'Pou4F1', 'pou4F1', 'POU4F1', 'pou4f1', original_name
        ]
        gene_matcher.run_matcher()

        self.assertEqual(
            len([g.ncbi_id for g in gene_matcher.genes if g.ncbi_id]), 1)

        gene_matcher = gene.GeneMatcher(organism, case_insensitive=True)
        gene_matcher.genes = [
            'Pou4F1', 'pou4F1', 'POU4F1', 'pou4f1', original_name
        ]
        gene_matcher.run_matcher()

        self.assertEqual(
            len([g.ncbi_id for g in gene_matcher.genes if g.ncbi_id]), 5)
        self.assertEqual(
            set([g.ncbi_id for g in gene_matcher.genes if g.ncbi_id]).pop(),
            ncbi_id)
def ncbi_info(taxid, genes, advance=None):
    ensure_downloaded(gene.DOMAIN, gene.FILENAME, advance)
    info = get_ncbi_info(taxid)

    gene_matcher = gene.GeneMatcher(str(taxid))
    gene_matcher.genes = genes
    gene_matcher.run_matcher()

    map_input_to_ensembl = OrderedDict(
        {input_name: ''
         for input_name in genes})
    ret = []

    for gene_obj in gene_matcher.get_known_genes():
        if gene_obj.ncbi_id:
            gi = info.get_gene_by_id(gene_obj.ncbi_id)

            ensemble = ''
            if 'Ensembl' in gi.db_refs:
                ensemble = gi.db_refs['Ensembl']
                map_input_to_ensembl[gene_obj.input_name] = ensemble

            ret.append((gene_obj.input_name, [
                str(gi.gene_id),
                str(ensemble), gi.symbol + " (%s)" % gene_obj.input_name
                if gene_obj.input_name != gi.symbol else gi.symbol,
                gi.locus_tag or "", gi.chromosome or "", gi.description or "",
                gi.synonyms, gi.symbol_from_nomenclature_authority or ""
            ]))
        else:
            ret.append(None)

    return map_input_to_ensembl, ret
Exemple #3
0
    def _update_gene_matcher(self):
        self._gene_names_from_table()
        if not self.gene_matcher:
            self.gene_matcher = gene.GeneMatcher(self._get_selected_organism())

        self.gene_matcher.genes = self.input_genes
        self.gene_matcher.organism = self._get_selected_organism()
    def test_multiple_hits_scenario(self):
        input_gene_name = 'HB1'
        organism = '9606'

        gene_matcher = gene.GeneMatcher(organism)
        gene_matcher.genes = [input_gene_name]
        gene_matcher.run_matcher()
        result = gene_matcher.genes[0]

        self.assertEqual(result.input_name, input_gene_name)
        self.assertEqual(result.type_of_match, None)
        self.assertEqual(result.ncbi_id, None)
        self.assertGreater(len(result.possible_hits), 0)
    def test_symbol_match_scenario(self):
        input_gene_name = 'SCN5A'
        ncbi_id = 6331
        organism = '9606'

        gene_matcher = gene.GeneMatcher(organism)
        gene_matcher.genes = [input_gene_name]
        gene_matcher.run_matcher()
        result = gene_matcher.genes[0]

        self.assertEqual(result.input_name, input_gene_name)
        self.assertEqual(result.type_of_match, gene._symbol)
        self.assertEqual(result.ncbi_id, ncbi_id)

        result.load_ncbi_info()
        for tag in gene.GENE_INFO_TAGS:
            self.assertIsNotNone(getattr(result, tag))
Exemple #6
0
    def handle_input(self, data):
        if data:
            self.input_data = data
            self.gene_matcher = gene.GeneMatcher(self._get_selected_organism())

            self.gene_column_combobox.clear()
            self.column_candidates = [
                attr for attr in data.domain.variables + data.domain.metas
                if isinstance(attr, (StringVariable, DiscreteVariable))
            ]

            for var in self.column_candidates:
                self.gene_column_combobox.addItem(*attributeItem(var))

            self.tax_id = str(data_hints.get_hint(self.input_data, TAX_ID))
            self.use_attr_names = data_hints.get_hint(
                self.input_data, GENE_NAME, default=self.use_attr_names)
            self.gene_col_index = min(self.gene_col_index,
                                      len(self.column_candidates) - 1)

            if self.tax_id in self.organisms:
                self.selected_organism = self.organisms.index(self.tax_id)

        self.on_input_option_change()
 def test_types(self):
     with self.assertRaises(TypeError):
         gene.GeneMatcher(9606)