def fetch_ncbi_geneinfo(taxid, progress=None): taxid = gene.NCBIGeneInfo.TAX_MAP.get(taxid, taxid) sf.localpath_download( "NCBI_geneinfo", "gene_info.{taxid}.db".format(taxid=taxid), callback=progress, verbose=True, ) return gene.NCBIGeneInfo(taxid)
def __init__(self, taxid=None, database=None): if taxid is not None and database is not None: raise ValueError("taxid and database parameters are exclusive.") self.db = None if taxid is None and database is not None: if isinstance(database, sqlite3.Connection): self.db = database self.filename = None else: self.filename = database self.db = sqlite3.connect(database) elif taxid is not None and database is None: self.filename = serverfiles.localpath_download( self.DOMAIN, self.FILENAME.format(taxid=taxid) ) elif taxid is None and database is None: # Back compatibility self.filename = serverfiles.localpath_download( "PPI", "string-protein.sqlite") else: assert False, "Not reachable" if self.db is None: self.db = sqlite3.connect(self.filename)
def __init__(self): self.filename = serverfiles.localpath_download( self.DOMAIN, self.SERVER_FILE) # assert version matches self.db = sqlite3.connect(self.filename) self.init_db_index()
def load(self): text = open(serverfiles.localpath_download("PPI", "BIOGRID-ALL.tab"), "rb").read() text = text.split("SOURCE\tPUBMED_ID\tORGANISM_A_ID\tORGANISM_B_ID\n", 1)[-1] self.interactions = [BioGRIDInteraction(line) for line in text.splitlines() if line.strip()] self.protein_interactions = defaultdict(set) self.protein_names = {} case = self._case def update(keys, value, collection): for k in keys: collection.setdefault(k, set()).add(value) for inter in self.interactions: update(map(case, [inter.official_symbol_a] + inter.aliases_for_a.split("|")), case(inter.interactor_a), self.protein_names) update(map(case, [inter.official_symbol_b] + inter.aliases_for_b.split("|")), case(inter.interactor_b), self.protein_names) self.protein_interactions[case(inter.interactor_a)].add(inter) self.protein_interactions[case(inter.interactor_b)].add(inter) self.protein_interactions = dict(self.protein_interactions) if case("N/A") in self.protein_names: del self.protein_names[case("N/A")]
def __init__(self, local_database_path=None): self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH if not os.path.exists(self.local_database_path): os.mkdir(self.local_database_path) self._mutants = pickle.load(open(localpath_download(domain, pickle_file), "rb"))
def __init__(self, local_database_path=None): self.local_database_path = local_database_path if local_database_path is not None else self.DEFAULT_DATABASE_PATH if not os.path.exists(self.local_database_path): os.mkdir(self.local_database_path) self._mutants = pickle.load( open(localpath_download(domain, pickle_file), "rb"))
def fetch_ppidb(ppisource, taxid, progress=None): fname = ppisource.sf_filename if "{taxid}" in fname: if taxid in ppisource.tax_mapping: taxid_m = ppisource.tax_mapping[taxid] if taxid_m is None: raise ValueError(taxid) taxid = taxid_m fname = fname.format(taxid=taxid) constructor = lambda: ppisource.constructor(taxid) else: constructor = ppisource.constructor sf.localpath_download( ppisource.sf_domain, fname, callback=progress, verbose=True ) return constructor()
def ef_ontology(): """ Return the `EF <http://www.ebi.ac.uk/efo/>`_ (Experimental Factor) ontology """ from . import obiOntology from Orange.utils import serverfiles # Should this be in the OBOFoundry (Ontology) domain try: file = open(serverfiles.localpath_download("ArrayExpress", "efo.obo"), "rb") except urllib2.HTTPError: file = urllib2.urlopen("http://efo.svn.sourceforge.net/svnroot/efo/trunk/src/efoinobo/efo.obo") return obiOntology.OBOOntology(file)
def __init__(self, taxid=None, database=None, detailed_database=None): STRING.__init__(self, taxid, database) if taxid is not None and detailed_database is not None: raise ValueError("taxid and detailed_database are exclusive") db_file = serverfiles.localpath(self.DOMAIN, self.FILENAME) if taxid is not None and detailed_database is None: detailed_database = serverfiles.localpath_download( self.DOMAIN, self.FILENAME_DETAILED.format(taxid=taxid) ) elif taxid is None and detailed_database is not None: detailed_database = detailed_database elif taxid is None and detailed_database is None: # Back compatibility detailed_database = serverfiles.localpath_download( "PPI", "string-protein-detailed.sqlite") self.db_detailed = sqlite3.connect(detailed_database) self.db_detailed.execute("ATTACH DATABASE ? as string", (db_file,))
def ef_ontology(): """ Return the `EF <http://www.ebi.ac.uk/efo/>`_ (Experimental Factor) ontology """ from . import obiOntology from Orange.utils import serverfiles # Should this be in the OBOFoundry (Ontology) domain try: file = open(serverfiles.localpath_download("ArrayExpress", "efo.obo"), "rb") except urllib2.HTTPError: file = urllib2.urlopen( "http://efo.svn.sourceforge.net/svnroot/efo/trunk/src/efoinobo/efo.obo" ) return obiOntology.OBOOntology(file)
def load(self): self.protein_names = defaultdict(set) self.refs = {} self.confidance = {} def process(element): d = {} participants = element.getElementsByTagName("proteinParticipant") proteins = [] for protein in participants: interactor = protein.getElementsByTagName("proteinInteractor")[0] names = [] for name in interactor.getElementsByTagName("shortLabel") + \ interactor.getElementsByTagName("fullName"): names.append((name.tagName, name.childNodes[0].data)) refs = [] for ref in interactor.getElementsByTagName("primaryRef"): refs += [(ref.tagName, ref.attributes.items())] org = dict(interactor.getElementsByTagName("organism")[0].attributes.items()).get("ncbiTaxId") conf = protein.getElementsByTagName("confidence")[0].attributes.items() proteins.append((names, refs, conf, org)) interaction = Interaction(proteins[0][0][1][1], proteins[1][0][1][1]) interaction.ref1, interaction.ref2 = proteins[0][1], proteins[1][1] interaction.conf1, interaction.conf2 = proteins[0][2], proteins[1][2] interaction.org1, interaction.org2 = proteins[0][3], proteins[1][3] self.protein_names[interaction.protein1].add(proteins[0][0][0][1]) self.protein_names[interaction.protein2].add(proteins[1][0][0][1]) return interaction document = minidom.parse(serverfiles.localpath_download("PPI", "allppis.xml")) interactions = document.getElementsByTagName("interaction") self.interactions = [process(interaction) for interaction in interactions] self.protein_interactions = defaultdict(set) for inter in self.interactions: self.protein_names[inter.protein1] = dict(inter.ref1[0][1]).get("id") self.protein_names[inter.protein2] = dict(inter.ref2[0][1]).get("id") self.protein_interactions[inter.protein1].add(inter) self.protein_interactions[inter.protein2].add(inter)
def ensure_downloaded(domain, filename, advance=None): serverfiles.localpath_download(domain, filename, callback=advance)
def ensure_downloaded(domain, filename, advance=None): serverfiles.localpath_download(domain, filename, callback=advance)