def download_gene_sets(tax_id, gene_sets): # get only those sets that are not already downloaded for hierarchy in [hierarchy for hierarchy in gene_sets]: serverfiles.localpath_download(geneset.DOMAIN, geneset.filename(hierarchy, tax_id)) return gene_sets
def ensure_downloaded(callback=None, verbose=True): """ Retrieve the taxonomy database if not already downloaded. """ warnings.warn("'ensure_downloaded' is deprecated", DeprecationWarning) serverfiles.localpath_download(DOMAIN, FILENAME, callback=callback, verbose=verbose)
def download_gene_sets(gene_sets, progress_callback): # get only those sets that are not already downloaded for hierarchy, tax_id in [(hierarchy, tax_id) for hierarchy, tax_id in gene_sets]: serverfiles.localpath_download(geneset.DOMAIN, geneset.filename(hierarchy, tax_id), callback=progress_callback.emit) return tax_id, gene_sets
def setUpClass(cls): """Code executed only once for all tests""" super().setUpClass() file_name = "panglao_gene_markers.tab" serverfiles.update(SERVER_FILES_DOMAIN, file_name) file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name) cls.panglao = Table.from_file(file_path) file_name = "cellMarker_gene_markers.tab" serverfiles.update(SERVER_FILES_DOMAIN, file_name) file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name) cls.cell_markers = Table.from_file(file_path)
def __init__(self, organism, ontology=None, progress_callback=None, filename=None): #: A dictionary mapping a gene (gene_id) to a set of all annotations of that gene. self.gene_annotations = defaultdict(list) #: A dictionary mapping a GO term id to a set of annotations that are directly annotated to that term self.term_anotations = defaultdict(list) self.all_annotations = defaultdict(list) self._gene_names = None self._gene_names_dict = None #: A list of all :class:`AnnotationRecords` instances. self.annotations = [] self.header = '' self.taxid = organism self._ontology = ontology if filename is None: try: filename = serverfiles.localpath_download( DOMAIN, FILENAME_ANNOTATION.format(organism), progress_callback=progress_callback) except FileNotFoundError: raise taxonomy.UnknownSpeciesIdentifier(organism) self._parse_file(filename)
def __init__(self): """ Retrieve infomation about `GEO DataSets <http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>`_. The class accesses the Orange server file that either resides on the local computer or is automatically retrieved from Orange server. Calls to this class do not access any NCBI's servers. Constructor returning the object with GEO DataSets information. The constructor will download GEO DataSets information file (gds_info.pickled) from Orange server, it will first check the local copy. An instance behaves like a dictionary: the keys are GEO DataSets IDs, and the dictionary values for is a dictionary providing various information about the particular data set. Example -------- >>> info = GDSInfo() >>> list(info.keys())[:5] ['GDS10', 'GDS100', 'GDS1001', 'GDS1002', 'GDS1003'] >>> info['GDS10']['title'] 'Type 1 diabetes gene expression profiling' >>> info['GDS10']['platform_organism'] 'Mus musculus' """ path = serverfiles.localpath_download(DOMAIN, GDS_INFO_FILENAME) with open(path, "rb") as f: self.info, self.excluded = pickle.load(f, encoding='latin1')
def _load_data(self): self.available_db_sources = get_available_db_sources() file_name = self.available_db_sources[ self.selected_db_source]['filename'] try: serverfiles.update(serverfiles_domain, file_name) except ConnectionError: raise ConnectionError('Can not connect to {}. ' 'Using only local files.'.format( serverfiles.server_url)) finally: file_path = serverfiles.localpath_download(serverfiles_domain, file_name) data = Table(file_path) # enforce order old_domain = data.domain new_domain = Domain( [], metas=[ old_domain['Organism'], old_domain['Name'], old_domain['Entrez ID'], old_domain['Cell Type'], old_domain['Function'], old_domain['Reference'], old_domain['URL'], ], ) data = data.transform(new_domain) self.data = data
def __init__(self): db_path = serverfiles.localpath_download(DOMAIN, FILENAME) if os.path.isfile(db_path): self._db_con = sqlite3.connect(db_path) else: raise GeneInfoFileNotFound(db_path)
def setUpClass(cls): super().setUpClass() cls.data = Table("https://datasets.biolab.si/sc/aml-1k.tab.gz") genes_path = serverfiles.localpath_download( "marker_genes", "panglao_gene_markers.tab") filter_ = FilterString("Organism", FilterString.Equal, "Human") cls.genes = Values([filter_])(Table(genes_path)) cls.iris = Table("iris")
def __init__(self): """ In orange-bio environment we typically work with organisms commonly used in molecular research projects listed here: https://www.ncbi.nlm.nih.gov/taxonomy """ # Ensure the taxonomy db is downloaded. file_path = serverfiles.localpath_download(DOMAIN, FILENAME) self._tax = TaxonomyDB(file_path)
def load(cls, progress_callback=None): """ A class method that tries to load the ontology file from default_database_path. It looks for a filename starting with 'gene_ontology'. If not found it will download it. """ filename = serverfiles.localpath_download(DOMAIN, FILENAME_ONTOLOGY) return cls(filename, progress_callback=progress_callback)
def __init__(self, local_database_path=None): self.local_database_path = local_database_path \ if local_database_path is not None else self.DEFAULT_DATABASE_PATH if self.local_database_path == self.DEFAULT_DATABASE_PATH: filename = serverfiles.localpath_download(DOMAIN, FILENAME) else: filename = os.path.join(self.local_database_path, FILENAME) self.load(filename)
def __init__(self, file_path=None): """ A collection of Dictybase mutants as a dictionary of :obj:`DictyMutant` objects. """ if file_path is None: file_path = serverfiles.localpath_download(DOMAIN, PHENOTYPES_FILENAME) with open(file_path, 'r') as fp: _mutants = [DictyMutant(mutant) for mutant in json.load(fp)] self._mutants = {m: m for m in _mutants}
def setUpClass(cls): super().setUpClass() Variable._clear_all_caches() cls._init_data() cls.signal_name = "Reference Data" cls.signal_data = cls.data cls.same_input_output_domain = False genes_path = serverfiles.localpath_download("marker_genes", "panglao_gene_markers.tab") filter_ = FilterString("Organism", FilterString.Equal, "Human") cls.genes = Values([filter_])(Table(genes_path)) cls.genes.attributes[TAX_ID] = "9606"
def __init__(self, filename=None, progress_callback=None): self.terms = {} self.typedefs = {} self.instances = {} self.slims_subset = set() self.alias_mapper = {} self.reverse_alias_mapper = defaultdict(set) self.header = "" if filename is not None: self.parse_file(filename, progress_callback) else: filename = serverfiles.localpath_download(DOMAIN, FILENAME_ONTOLOGY) self.parse_file(filename, progress_callback)
def _load_data(self): self.Warning.using_local_files.clear() found_sources = {} try: found_sources.update( serverfiles.ServerFiles().allinfo(serverfiles_domain)) except requests.exceptions.ConnectionError: found_sources.update(serverfiles.allinfo(serverfiles_domain)) self.Warning.using_local_files() self.available_db_sources = { item.get('title').split(': ')[-1]: item for item in found_sources.values() } if self.available_db_sources: file_name = self.available_db_sources[ self.selected_db_source]['filename'] try: serverfiles.update(serverfiles_domain, file_name) except requests.exceptions.ConnectionError: # try to update file. Ignore network errors. pass try: file_path = serverfiles.localpath_download( serverfiles_domain, file_name) except requests.exceptions.ConnectionError as err: # Unexpected error. raise err data = Table(file_path) # enforce order old_domain = data.domain new_domain = Domain( [], metas=[ old_domain['Organism'], old_domain['Name'], old_domain['Entrez ID'], old_domain['Cell Type'], old_domain['Function'], old_domain['Reference'], old_domain['URL'], ], ) data = data.transform(new_domain) self.data = data
def __init__(self, local_database_path=None): """ A collection of Dictybase mutants as a dictionary of :obj:`DictyMutant` objects. :param local_database_path: A path for storing D. dictyostelium mutants objects. If `None` then a default database path is used. """ self.local_database_path = local_database_path \ if local_database_path is not None else self.DEFAULT_DATABASE_PATH if not os.path.exists(self.local_database_path): os.mkdir(self.local_database_path) self._mutants = pickle.load(open(serverfiles.localpath_download(DOMAIN, PHENOTYPES_FILENAME), "rb"))
def __init__(self): self.file_path: str = serverfiles.localpath_download( 'homologene', 'homologene.tab') with open(self.file_path, 'r') as fp: self._homologs: Dict[str, Gene] = { h.gene_id: h for h in [_from_data_to_gene(line) for line in fp.readlines()] } def _helper(groups, gene): groups[gene.homology_group_id].append(gene) return groups self._homologs_by_group: Dict[str, List[Gene]] = reduce( _helper, self._homologs.values(), defaultdict(list))
def __init__(self): """ Retrieve infomation about `GEO DataSets <http://www.ncbi.nlm.nih.gov/sites/GDSbrowser>`_. The class accesses the Orange server file that either resides on the local computer or is automatically retrieved from Orange server. Calls to this class do not access any NCBI's servers. Constructor returning the object with GEO DataSets information. The constructor will download GEO DataSets information file (gds_info.pickled) from Orange server, it will first check the local copy. An instance behaves like a dictionary: the keys are GEO DataSets IDs, and the dictionary values for is a dictionary providing various information about the particular data set. """ path = serverfiles.localpath_download(DOMAIN, GDS_INFO_FILENAME) with open(path, "rb") as f: self.info, self.excluded = pickle.load(f, encoding='latin1')
def load_matcher_file(self, domain, filename): try: # this starts download if files are not on local machine file_path = serverfiles.localpath_download(domain, filename) # download new version before using this file for gene name matching # serverfiles.update(domain, filename) except (ConnectTimeout, RequestException, ConnectionError) as e: # Do not raise exception. print(e) return def case_insensitive_keys(matcher_dict): updated_dict = { MAP_SOURCES: matcher_dict[MAP_SOURCES], MAP_GENE_ID: matcher_dict[MAP_GENE_ID], MAP_LOCUS: matcher_dict[MAP_LOCUS], MAP_SYNONYMS: defaultdict(list), MAP_SYMBOL: defaultdict(list), MAP_NOMENCLATURE: defaultdict(list) } for key, value in matcher_dict[MAP_SYMBOL].items(): # ensure string, we are using string methods (upper, lower) key = ensure_type(str(key), str) updated_dict[MAP_SYMBOL][key] = value updated_dict[MAP_SYMBOL][key.lower()] = value for key, value in matcher_dict[MAP_SYNONYMS].items(): key = ensure_type(str(key), str) updated_dict[MAP_SYNONYMS][key] = value updated_dict[MAP_SYNONYMS][key.lower()] = value for key, value in matcher_dict[MAP_NOMENCLATURE].items(): key = ensure_type(str(key), str) updated_dict[MAP_NOMENCLATURE][key] = value updated_dict[MAP_NOMENCLATURE][key.lower()] = value return updated_dict with open(file_path, 'rb') as pickle_file: if not self._case_insensitive: return pickle.load(pickle_file) else: return case_insensitive_keys(pickle.load(pickle_file))
def load_gene_summary(tax_d: str, genes: List[Optional[str]]) -> List[Optional[Gene]]: gene_db_path = serverfiles.localpath_download(DOMAIN, f'{tax_d}.sqlite') # filter NoneTypes _genes = [g for g in genes if g] with contextlib.closing(sqlite3.connect(gene_db_path)) as con: with con as cur: gene_map: Dict[str, Gene] = {} for gene_info in cur.execute( f'SELECT * FROM gene_info WHERE gene_id in ({",".join(_genes)})' ).fetchall(): gene = Gene() gene.load_attributes(gene_info) gene_map[gene.gene_id] = gene return [gene_map.get(gid, None) if gid else None for gid in genes]
def _source_changed(self) -> None: """ Respond on change of the source and download the data. """ if self.available_sources: file_name = self.available_sources[self.selected_source]['filename'] try: serverfiles.update(SERVER_FILES_DOMAIN, file_name) except requests.exceptions.ConnectionError: # try to update file. Ignore network errors. pass try: file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name) except requests.exceptions.ConnectionError as err: # Unexpected error. raise err self.data = Table.from_file(file_path)
def load_gene_sets(hierarchy, tax_id): # type: (Tuple[Tuple(str, str), str]) -> GeneSets """ Initialize gene sets from a given hierarchy. :param tuple hierarchy: gene set hierarchy. :rtype: :obj:`GeneSets` Example -------- Gene sets provided with Orange are organized hierarchically: >>> list_of_genesets= list_all(organism='10090') [(('KEGG', 'Pathways'), '10090'), (('KEGG', 'pathways'), '10090'), (('GO', 'biological_process'), '10090'), (('GO', 'molecular_function'), '10090'), (('GO', 'cellular_component'), '10090')] >>> load_gene_sets(list_of_genesets[0]) """ file_path = serverfiles.localpath_download(DOMAIN, filename(hierarchy, tax_id)) return GeneSets.from_gmt_file_format(file_path)
import bz2 import pickle from collections import defaultdict from server_update import * from server_update.tests.test_GeneInfo import GeneInfo from orangecontrib.bioinformatics.ncbi.gene import ( DOMAIN, FILENAME, gene_matcher_tuple, MATCHER_FILENAME, MATCHER_TITLE, MATCHER_TAGS, MAP_GENE_IDS, MAP_SOURCES, MAP_SYMBOLS, MAP_SYNONYMS, MAP_LOCUS) from orangecontrib.bioinformatics.ncbi.gene.utils import parse_sources, parse_synonyms, GeneInfoDB from orangecontrib.bioinformatics.ncbi.taxonomy import common_taxids, common_taxid_to_name from orangecontrib.bioinformatics.utils import serverfiles serverfiles.localpath_download(DOMAIN, FILENAME) tax_id, gene_id, symbol, synonyms, source, locus_tag = 0, 1, 2, 3, 4, 5 domain_path = sf_local.localpath(DOMAIN) temp_path = os.path.join(domain_path, sf_temp) db_path = os.path.join(domain_path, FILENAME) create_folder(temp_path) create_folder(domain_path) def parse_gene_record(parent_tax, mapper, gene_record): gene = gene_matcher_tuple(parent_tax, gene_record[gene_id],
def load_matcher_file(cls, domain, filename): # this starts download if files are not on local machine file_path = serverfiles.localpath_download(domain, filename) with open(file_path, 'rb') as pickle_file: return pickle.load(pickle_file)
def load_gene_sets(hierarchy): file_path = serverfiles.localpath_download(DOMAIN, filename(*hierarchy)) return GeneSets.from_gmt_file_format(file_path)
def ensure_downloaded(domain, filename, advance=None): serverfiles.localpath_download(domain, filename, callback=advance)
def _gene_db_path(self): return serverfiles.localpath_download(DOMAIN, f'{self.tax_id}.sqlite')
def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() if __name__ == "__main__": from Orange.projection import PCA from orangecontrib.bioinformatics.utils import serverfiles data_path = "https://datasets.orange.biolab.si/sc/aml-1k.tab.gz" table_data = Table(data_path) table_data.attributes[TAX_ID] = "9606" ref_data = table_data[::2] pca = PCA(n_components=2) pca_model = pca(ref_data) proj = pca_model(ref_data) new_dom = Domain( ref_data.domain.attributes, ref_data.domain.class_vars, chain(ref_data.domain.metas, proj.domain.attributes) ) ref_data = ref_data.transform(new_dom) genes_path = serverfiles.localpath_download("marker_genes", "panglao_gene_markers.tab") filter_ = FilterString("Organism", FilterString.Equal, "Human") table_genes = Values([filter_])(Table(genes_path)) table_genes.attributes[TAX_ID] = "9606" WidgetPreview(OWAnnotateProjection).run( set_data=ref_data, set_secondary_data=table_data[1:200:2], set_genes=table_genes )