def download_gene_sets(tax_id, gene_sets):

    # get only those sets that are not already downloaded
    for hierarchy in [hierarchy for hierarchy in gene_sets]:
        serverfiles.localpath_download(geneset.DOMAIN,
                                       geneset.filename(hierarchy, tax_id))
    return gene_sets
def ensure_downloaded(callback=None, verbose=True):
    """ Retrieve the taxonomy database if not already downloaded.
    """
    warnings.warn("'ensure_downloaded' is deprecated", DeprecationWarning)
    serverfiles.localpath_download(DOMAIN,
                                   FILENAME,
                                   callback=callback,
                                   verbose=verbose)
Exemple #3
0
def download_gene_sets(gene_sets, progress_callback):

    # get only those sets that are not already downloaded
    for hierarchy, tax_id in [(hierarchy, tax_id)
                              for hierarchy, tax_id in gene_sets]:

        serverfiles.localpath_download(geneset.DOMAIN,
                                       geneset.filename(hierarchy, tax_id),
                                       callback=progress_callback.emit)

    return tax_id, gene_sets
Exemple #4
0
    def setUpClass(cls):
        """Code executed only once for all tests"""
        super().setUpClass()
        file_name = "panglao_gene_markers.tab"
        serverfiles.update(SERVER_FILES_DOMAIN, file_name)
        file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name)
        cls.panglao = Table.from_file(file_path)

        file_name = "cellMarker_gene_markers.tab"
        serverfiles.update(SERVER_FILES_DOMAIN, file_name)
        file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name)
        cls.cell_markers = Table.from_file(file_path)
    def __init__(self,
                 organism,
                 ontology=None,
                 progress_callback=None,
                 filename=None):
        #: A dictionary mapping a gene (gene_id) to a set of all annotations of that gene.
        self.gene_annotations = defaultdict(list)

        #: A dictionary mapping a GO term id to a set of annotations that are directly annotated to that term
        self.term_anotations = defaultdict(list)

        self.all_annotations = defaultdict(list)

        self._gene_names = None
        self._gene_names_dict = None

        #: A list of all :class:`AnnotationRecords` instances.
        self.annotations = []
        self.header = ''
        self.taxid = organism

        self._ontology = ontology

        if filename is None:
            try:
                filename = serverfiles.localpath_download(
                    DOMAIN,
                    FILENAME_ANNOTATION.format(organism),
                    progress_callback=progress_callback)
            except FileNotFoundError:
                raise taxonomy.UnknownSpeciesIdentifier(organism)

        self._parse_file(filename)
Exemple #6
0
    def __init__(self):
        """ Retrieve infomation about `GEO DataSets <http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>`_.

        The class accesses the Orange server file that either resides on the local computer or
        is automatically retrieved from Orange server. Calls to this class do not access any NCBI's servers.

        Constructor returning the object with GEO DataSets information. The constructor
        will download GEO DataSets information file (gds_info.pickled) from Orange server,
        it will first check the local copy.

        An instance behaves like a dictionary: the keys are GEO DataSets IDs, and the dictionary values
        for is a dictionary providing various information about the particular data set.

        Example
        --------
            >>> info = GDSInfo()
            >>> list(info.keys())[:5]
            ['GDS10', 'GDS100', 'GDS1001', 'GDS1002', 'GDS1003']
            >>> info['GDS10']['title']
            'Type 1 diabetes gene expression profiling'
            >>> info['GDS10']['platform_organism']
            'Mus musculus'

        """

        path = serverfiles.localpath_download(DOMAIN, GDS_INFO_FILENAME)
        with open(path, "rb") as f:
            self.info, self.excluded = pickle.load(f, encoding='latin1')
    def _load_data(self):
        self.available_db_sources = get_available_db_sources()
        file_name = self.available_db_sources[
            self.selected_db_source]['filename']

        try:
            serverfiles.update(serverfiles_domain, file_name)
        except ConnectionError:
            raise ConnectionError('Can not connect to {}. '
                                  'Using only local files.'.format(
                                      serverfiles.server_url))
        finally:
            file_path = serverfiles.localpath_download(serverfiles_domain,
                                                       file_name)
            data = Table(file_path)

            # enforce order
            old_domain = data.domain
            new_domain = Domain(
                [],
                metas=[
                    old_domain['Organism'],
                    old_domain['Name'],
                    old_domain['Entrez ID'],
                    old_domain['Cell Type'],
                    old_domain['Function'],
                    old_domain['Reference'],
                    old_domain['URL'],
                ],
            )
            data = data.transform(new_domain)
            self.data = data
Exemple #8
0
    def __init__(self):
        db_path = serverfiles.localpath_download(DOMAIN, FILENAME)

        if os.path.isfile(db_path):
            self._db_con = sqlite3.connect(db_path)
        else:
            raise GeneInfoFileNotFound(db_path)
 def setUpClass(cls):
     super().setUpClass()
     cls.data = Table("https://datasets.biolab.si/sc/aml-1k.tab.gz")
     genes_path = serverfiles.localpath_download(
         "marker_genes", "panglao_gene_markers.tab")
     filter_ = FilterString("Organism", FilterString.Equal, "Human")
     cls.genes = Values([filter_])(Table(genes_path))
     cls.iris = Table("iris")
    def __init__(self):
        """
        In orange-bio environment we typically work with organisms commonly used in molecular research projects
        listed here: https://www.ncbi.nlm.nih.gov/taxonomy
        """

        # Ensure the taxonomy db is downloaded.
        file_path = serverfiles.localpath_download(DOMAIN, FILENAME)
        self._tax = TaxonomyDB(file_path)
Exemple #11
0
    def load(cls, progress_callback=None):
        """ A class method that tries to load the ontology file from
        default_database_path. It looks for a filename starting with
        'gene_ontology'. If not found it will download it.

        """
        filename = serverfiles.localpath_download(DOMAIN, FILENAME_ONTOLOGY)

        return cls(filename, progress_callback=progress_callback)
    def __init__(self, local_database_path=None):
        self.local_database_path = local_database_path \
            if local_database_path is not None else self.DEFAULT_DATABASE_PATH

        if self.local_database_path == self.DEFAULT_DATABASE_PATH:
            filename = serverfiles.localpath_download(DOMAIN, FILENAME)
        else:
            filename = os.path.join(self.local_database_path, FILENAME)

        self.load(filename)
    def __init__(self, file_path=None):
        """  A collection of Dictybase mutants as a dictionary of :obj:`DictyMutant` objects.
        """
        if file_path is None:
            file_path = serverfiles.localpath_download(DOMAIN,
                                                       PHENOTYPES_FILENAME)

        with open(file_path, 'r') as fp:
            _mutants = [DictyMutant(mutant) for mutant in json.load(fp)]
            self._mutants = {m: m for m in _mutants}
    def setUpClass(cls):
        super().setUpClass()
        Variable._clear_all_caches()
        cls._init_data()
        cls.signal_name = "Reference Data"
        cls.signal_data = cls.data
        cls.same_input_output_domain = False

        genes_path = serverfiles.localpath_download("marker_genes", "panglao_gene_markers.tab")
        filter_ = FilterString("Organism", FilterString.Equal, "Human")
        cls.genes = Values([filter_])(Table(genes_path))
        cls.genes.attributes[TAX_ID] = "9606"
Exemple #15
0
    def __init__(self, filename=None, progress_callback=None):
        self.terms = {}
        self.typedefs = {}
        self.instances = {}
        self.slims_subset = set()
        self.alias_mapper = {}
        self.reverse_alias_mapper = defaultdict(set)
        self.header = ""

        if filename is not None:
            self.parse_file(filename, progress_callback)
        else:
            filename = serverfiles.localpath_download(DOMAIN, FILENAME_ONTOLOGY)
            self.parse_file(filename, progress_callback)
    def _load_data(self):
        self.Warning.using_local_files.clear()

        found_sources = {}
        try:
            found_sources.update(
                serverfiles.ServerFiles().allinfo(serverfiles_domain))
        except requests.exceptions.ConnectionError:
            found_sources.update(serverfiles.allinfo(serverfiles_domain))
            self.Warning.using_local_files()

        self.available_db_sources = {
            item.get('title').split(': ')[-1]: item
            for item in found_sources.values()
        }

        if self.available_db_sources:
            file_name = self.available_db_sources[
                self.selected_db_source]['filename']

            try:
                serverfiles.update(serverfiles_domain, file_name)
            except requests.exceptions.ConnectionError:
                # try to update file. Ignore network errors.
                pass

            try:
                file_path = serverfiles.localpath_download(
                    serverfiles_domain, file_name)
            except requests.exceptions.ConnectionError as err:
                # Unexpected error.
                raise err

            data = Table(file_path)
            # enforce order
            old_domain = data.domain
            new_domain = Domain(
                [],
                metas=[
                    old_domain['Organism'],
                    old_domain['Name'],
                    old_domain['Entrez ID'],
                    old_domain['Cell Type'],
                    old_domain['Function'],
                    old_domain['Reference'],
                    old_domain['URL'],
                ],
            )
            data = data.transform(new_domain)
            self.data = data
    def __init__(self, local_database_path=None):
        """  A collection of Dictybase mutants as a dictionary of :obj:`DictyMutant` objects.

        :param local_database_path: A path for storing D. dictyostelium mutants objects. If `None` then
                                    a default database path is used.
        """

        self.local_database_path = local_database_path \
            if local_database_path is not None else self.DEFAULT_DATABASE_PATH

        if not os.path.exists(self.local_database_path):
            os.mkdir(self.local_database_path)

        self._mutants = pickle.load(open(serverfiles.localpath_download(DOMAIN, PHENOTYPES_FILENAME), "rb"))
    def __init__(self):
        self.file_path: str = serverfiles.localpath_download(
            'homologene', 'homologene.tab')

        with open(self.file_path, 'r') as fp:
            self._homologs: Dict[str, Gene] = {
                h.gene_id: h
                for h in [_from_data_to_gene(line) for line in fp.readlines()]
            }

        def _helper(groups, gene):
            groups[gene.homology_group_id].append(gene)
            return groups

        self._homologs_by_group: Dict[str, List[Gene]] = reduce(
            _helper, self._homologs.values(), defaultdict(list))
    def __init__(self):
        """ Retrieve infomation about `GEO DataSets <http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>`_.

        The class accesses the Orange server file that either resides on the local computer or
        is automatically retrieved from Orange server. Calls to this class do not access any NCBI's servers.

        Constructor returning the object with GEO DataSets information. The constructor
        will download GEO DataSets information file (gds_info.pickled) from Orange server,
        it will first check the local copy.

        An instance behaves like a dictionary: the keys are GEO DataSets IDs, and the dictionary values
        for is a dictionary providing various information about the particular data set.

        """

        path = serverfiles.localpath_download(DOMAIN, GDS_INFO_FILENAME)
        with open(path, "rb") as f:
            self.info, self.excluded = pickle.load(f, encoding='latin1')
Exemple #20
0
    def load_matcher_file(self, domain, filename):
        try:
            # this starts download if files are not on local machine
            file_path = serverfiles.localpath_download(domain, filename)
            # download new version before using this file for gene name matching
            # serverfiles.update(domain, filename)
        except (ConnectTimeout, RequestException, ConnectionError) as e:
            # Do not raise exception.
            print(e)
            return

        def case_insensitive_keys(matcher_dict):
            updated_dict = {
                MAP_SOURCES: matcher_dict[MAP_SOURCES],
                MAP_GENE_ID: matcher_dict[MAP_GENE_ID],
                MAP_LOCUS: matcher_dict[MAP_LOCUS],
                MAP_SYNONYMS: defaultdict(list),
                MAP_SYMBOL: defaultdict(list),
                MAP_NOMENCLATURE: defaultdict(list)
            }

            for key, value in matcher_dict[MAP_SYMBOL].items():
                # ensure string, we are using string methods (upper, lower)
                key = ensure_type(str(key), str)
                updated_dict[MAP_SYMBOL][key] = value
                updated_dict[MAP_SYMBOL][key.lower()] = value

            for key, value in matcher_dict[MAP_SYNONYMS].items():
                key = ensure_type(str(key), str)
                updated_dict[MAP_SYNONYMS][key] = value
                updated_dict[MAP_SYNONYMS][key.lower()] = value

            for key, value in matcher_dict[MAP_NOMENCLATURE].items():
                key = ensure_type(str(key), str)
                updated_dict[MAP_NOMENCLATURE][key] = value
                updated_dict[MAP_NOMENCLATURE][key.lower()] = value

            return updated_dict

        with open(file_path, 'rb') as pickle_file:
            if not self._case_insensitive:
                return pickle.load(pickle_file)
            else:
                return case_insensitive_keys(pickle.load(pickle_file))
Exemple #21
0
def load_gene_summary(tax_d: str,
                      genes: List[Optional[str]]) -> List[Optional[Gene]]:
    gene_db_path = serverfiles.localpath_download(DOMAIN, f'{tax_d}.sqlite')

    # filter NoneTypes
    _genes = [g for g in genes if g]

    with contextlib.closing(sqlite3.connect(gene_db_path)) as con:
        with con as cur:

            gene_map: Dict[str, Gene] = {}
            for gene_info in cur.execute(
                    f'SELECT * FROM gene_info WHERE gene_id in ({",".join(_genes)})'
            ).fetchall():
                gene = Gene()
                gene.load_attributes(gene_info)
                gene_map[gene.gene_id] = gene

            return [gene_map.get(gid, None) if gid else None for gid in genes]
    def _source_changed(self) -> None:
        """
        Respond on change of the source and download the data.
        """
        if self.available_sources:
            file_name = self.available_sources[self.selected_source]['filename']

            try:
                serverfiles.update(SERVER_FILES_DOMAIN, file_name)
            except requests.exceptions.ConnectionError:
                # try to update file. Ignore network errors.
                pass

            try:
                file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name)
            except requests.exceptions.ConnectionError as err:
                # Unexpected error.
                raise err
            self.data = Table.from_file(file_path)
def load_gene_sets(hierarchy, tax_id):
    # type: (Tuple[Tuple(str, str), str]) -> GeneSets
    """ Initialize gene sets from a given hierarchy.

    :param tuple hierarchy: gene set hierarchy.
    :rtype: :obj:`GeneSets`

    Example
    --------
    Gene sets provided with Orange are organized hierarchically:
        >>> list_of_genesets= list_all(organism='10090')
            [(('KEGG', 'Pathways'), '10090'),
             (('KEGG', 'pathways'), '10090'),
             (('GO', 'biological_process'), '10090'),
             (('GO', 'molecular_function'), '10090'),
             (('GO', 'cellular_component'), '10090')]
        >>> load_gene_sets(list_of_genesets[0])

    """
    file_path = serverfiles.localpath_download(DOMAIN, filename(hierarchy, tax_id))
    return GeneSets.from_gmt_file_format(file_path)
Exemple #24
0
import bz2
import pickle

from collections import defaultdict
from server_update import *
from server_update.tests.test_GeneInfo import GeneInfo
from orangecontrib.bioinformatics.ncbi.gene import (
    DOMAIN, FILENAME, gene_matcher_tuple, MATCHER_FILENAME, MATCHER_TITLE,
    MATCHER_TAGS, MAP_GENE_IDS, MAP_SOURCES, MAP_SYMBOLS, MAP_SYNONYMS,
    MAP_LOCUS)

from orangecontrib.bioinformatics.ncbi.gene.utils import parse_sources, parse_synonyms, GeneInfoDB
from orangecontrib.bioinformatics.ncbi.taxonomy import common_taxids, common_taxid_to_name
from orangecontrib.bioinformatics.utils import serverfiles

serverfiles.localpath_download(DOMAIN, FILENAME)

tax_id, gene_id, symbol, synonyms, source, locus_tag = 0, 1, 2, 3, 4, 5

domain_path = sf_local.localpath(DOMAIN)
temp_path = os.path.join(domain_path, sf_temp)

db_path = os.path.join(domain_path, FILENAME)

create_folder(temp_path)
create_folder(domain_path)


def parse_gene_record(parent_tax, mapper, gene_record):

    gene = gene_matcher_tuple(parent_tax, gene_record[gene_id],
Exemple #25
0
    def load_matcher_file(cls, domain, filename):
        # this starts download if files are not on local machine
        file_path = serverfiles.localpath_download(domain, filename)

        with open(file_path, 'rb') as pickle_file:
            return pickle.load(pickle_file)
def load_gene_sets(hierarchy):
    file_path = serverfiles.localpath_download(DOMAIN, filename(*hierarchy))
    return GeneSets.from_gmt_file_format(file_path)
def ensure_downloaded(domain, filename, advance=None):
    serverfiles.localpath_download(domain, filename, callback=advance)
Exemple #28
0
 def _gene_db_path(self):
     return serverfiles.localpath_download(DOMAIN, f'{self.tax_id}.sqlite')
Exemple #29
0
    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()


if __name__ == "__main__":
    from Orange.projection import PCA
    from orangecontrib.bioinformatics.utils import serverfiles

    data_path = "https://datasets.orange.biolab.si/sc/aml-1k.tab.gz"
    table_data = Table(data_path)
    table_data.attributes[TAX_ID] = "9606"

    ref_data = table_data[::2]
    pca = PCA(n_components=2)
    pca_model = pca(ref_data)
    proj = pca_model(ref_data)
    new_dom = Domain(
        ref_data.domain.attributes, ref_data.domain.class_vars, chain(ref_data.domain.metas, proj.domain.attributes)
    )
    ref_data = ref_data.transform(new_dom)

    genes_path = serverfiles.localpath_download("marker_genes", "panglao_gene_markers.tab")
    filter_ = FilterString("Organism", FilterString.Equal, "Human")
    table_genes = Values([filter_])(Table(genes_path))
    table_genes.attributes[TAX_ID] = "9606"

    WidgetPreview(OWAnnotateProjection).run(
        set_data=ref_data, set_secondary_data=table_data[1:200:2], set_genes=table_genes
    )