Exemple #1
0
    def download_data(cls, taxid=None, progress_callback=None):
        """ Download the data for ``taxid`` from the GeneMANIA
        website and initialize the local database.
        
        """
        import tarfile

        baseurl = "http://genemania.org/data/current/"
        directory = orngServerFiles.localpath("PPI")

        if not os.path.exists(directory):
            os.makedirs(directory)

        if taxid is None:
            taxid = cls.common_taxids()

        if isinstance(taxid, (list, tuple)):
            taxids = taxid
        else:
            taxids = [taxid]
        for taxid in taxids:
            name = obiTaxonomy.name(taxid)
            name = name.replace(" ", "_")

            if progress_callback is None:
                progress = True  #orngServerFiles.ConsoleProgressBar("Downloading %r." % filename)
            else:
                progress = progress_callback

            filename = name + ".tgz"
            url = baseurl + "networks/" + filename
            wget(url, directory=directory, progress=progress)

            tgz_filename = os.path.join(directory, filename)
            tgz = tarfile.open(tgz_filename)
            tgz.extractall(directory)

            filename = name + ".COMBINED.tgz"
            url = baseurl + "precombined/" + filename
            wget(url, directory=directory, progress=progress)

            tgz_filename = os.path.join(directory, filename)
            tgz = tarfile.open(tgz_filename)
            tgz.extractall(directory)

            cls.init_db([taxid])
    def download_data(cls, taxid=None, progress_callback=None):
        """ Download the data for ``taxid`` from the GeneMANIA
        website and initialize the local database.
        
        """
        import tarfile
        
        baseurl = "http://genemania.org/data/current/"
        directory = orngServerFiles.localpath("PPI")

        if not os.path.exists(directory):
            os.makedirs(directory)

        if taxid is None:
            taxid = cls.common_taxids()
        
        if isinstance(taxid, (list, tuple)):
            taxids = taxid
        else:
            taxids = [taxid]
        for taxid in taxids:
            name = obiTaxonomy.name(taxid)
            name = name.replace(" ", "_")
            
            if progress_callback is None:
                progress = True #orngServerFiles.ConsoleProgressBar("Downloading %r." % filename)
            else:
                progress = progress_callback
            
            filename = name + ".tgz"
            url = baseurl + "networks/" + filename    
            wget(url, directory=directory, progress=progress)
            
            tgz_filename = os.path.join(directory, filename)    
            tgz = tarfile.open(tgz_filename)
            tgz.extractall(directory)
            
            filename = name + ".COMBINED.tgz"
            url = baseurl + "precombined/" + filename
            wget(url, directory=directory, progress=progress)
            
            tgz_filename = os.path.join(directory, filename)
            tgz = tarfile.open(tgz_filename)
            tgz.extractall(directory)
        
            cls.init_db([taxid])
Exemple #3
0
        def download(filename, url):
            with open(pjoin(cache_dir, filename + ".tmp"), "wb") as dest:
                wget(url, dst_obj=dest, progress=True)

            shutil.move(pjoin(cache_dir, filename + ".tmp"),
                        pjoin(cache_dir, filename))
Exemple #4
0
    def init_db(cls, version, taxid, cache_dir=None, dbfilename=None):
        if cache_dir is None:
            cache_dir = serverfiles.localpath(cls.DOMAIN)
        if dbfilename is None:
            dbfilename = serverfiles.localpath(
                cls.DOMAIN,
                "string-protein-detailed.{taxid}.sqlite".format(taxid=taxid)
            )

        pjoin = os.path.join

        base_url = "http://string-db.org/newstring_download/"
        filename = "{taxid}.protein.links.detailed.{version}.txt.gz"
        filename = filename.format(version=version, taxid=taxid)
        url = base_url + "protein.links.detailed.{version}/" + filename
        url = url.format(version=version)

        if not os.path.exists(pjoin(cache_dir, filename)):
            wget(url, cache_dir, progress=True)

        links_fileobj = open(pjoin(cache_dir, filename), "rb")
        links_file = gzip.GzipFile(fileobj=links_fileobj)

        con = sqlite3.connect(dbfilename)
        with con:
            con.execute("""
                DROP TABLE IF EXISTS evidence
            """)

            con.execute("""
                CREATE TABLE evidence(
                     protein_id1 TEXT,
                     protein_id2 TEXT,
                     neighborhood INTEGER,
                     fusion INTEGER,
                     cooccurence INTEGER,
                     coexpression INTEGER,
                     experimental INTEGER,
                     database INTEGER,
                     textmining INTEGER
                    )
                """)

            links = csv.reader(links_file, delimiter=" ")
            links.next()  # Read header
            filesize = os.stat(pjoin(cache_dir, filename)).st_size

            progress = ConsoleProgressBar("Processing links file:")
            progress(1.0)

            def read_links(reader):
                for i, (p1, p2, n, f, c, cx, ex, db, t, _) in \
                        enumerate(reader):
                    yield p1, p2, n, f, c, cx, ex, db, t

                    if i % 10000 == 0:
                        progress(100.0 * links_fileobj.tell() / filesize)

            con.executemany("""
                INSERT INTO evidence
                VALUES  (?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, read_links(links))

            progress.finish()

            print("Indexing")
            con.execute("""\
                CREATE INDEX IF NOT EXISTS index_evidence
                    ON evidence (protein_id1, protein_id2)
            """)

            con.executescript("""
                DROP TABLE IF EXISTS version;

                CREATE TABLE version (
                     string_version text,
                     api_version text
                );
                """)

            con.execute("""
                INSERT INTO version
                VALUES (?, ?)""", (version, cls.VERSION))