def wget(url, directory=".", dst_obj=None, progress=None):
    stream = urllib2.urlopen(url)
    length = stream.headers.get("content-length", None)
    if length is None:
        length = sys.maxint
    else:
        length = int(length)

    basename = posixpath.basename(url)

    if dst_obj is None:
        dst_obj = open(os.path.join(directory, basename), "wb")

    if progress == True:
        from Orange.utils import ConsoleProgressBar
        progress = ConsoleProgressBar("Downloading %r." % basename)
        with finishing(progress):
            copyfileobj(stream,
                        dst_obj,
                        buffer=2**10,
                        content_len=length,
                        progress=progress)
    else:
        copyfileobj(stream,
                    dst_obj,
                    buffer=2**10,
                    content_len=length,
                    progress=progress)
Example #2
0
 def getstring(self):
     speed = int(self.state * self.size / 100.0 /
                 (time.time() - self.starttime))
     eta = (100 - self.state) * self.size / 100.0 / speed
     return ConsoleProgressBar.getstring(
         self) + "  %s  %12s/s  %3i:%02i ETA" % (self.sizeof_fmt(
             self.size), self.sizeof_fmt(speed), eta / 60, eta % 60)
Example #3
0
 def getstring(self):
     elapsed = max(time.time() - self.starttime, 0.1)
     speed = max(int(self.state * self.size / 100.0 / elapsed), 1)
     eta = (100 - self.state) * self.size / 100.0 / speed
     return ConsoleProgressBar.getstring(self) + \
            "  %s  %12s/s  %3i:%02i ETA" % (self.sizeof_fmt(self.size),
                                            self.sizeof_fmt(speed),
                                            eta / 60, eta % 60)
Example #4
0
 def getstring(self):
     elapsed = max(time.time() - self.starttime, 0.1)
     speed = max(int(self.state * self.size / 100.0 / elapsed), 1)
     eta = (100 - self.state) * self.size / 100.0 / speed
     return ConsoleProgressBar.getstring(self) + \
            "  %s  %12s/s  %3i:%02i ETA" % (self.sizeof_fmt(self.size),
                                            self.sizeof_fmt(speed),
                                            eta / 60, eta % 60)
Example #5
0
 def __call__(self, *args, **kwargs):
     ret = ConsoleProgressBar.__call__(self, *args, **kwargs)
     if self.redirect:
         self.redirect(self.state)
     return ret
Example #6
0
 def __init__(self, filename, size):
     print "Downloading", filename
     ConsoleProgressBar.__init__(self, "progress:", 20)
     self.size = size
     self.starttime = time.time()
     self.speed = 0.0
Example #7
0
 def __call__(self, *args, **kwargs):
     ret = ConsoleProgressBar.__call__(self, *args, **kwargs)
     if self.redirect:
         self.redirect(self.state)
     return ret
Example #8
0
 def __init__(self, filename, size):
     print("Downloading", filename)
     ConsoleProgressBar.__init__(self, "progress:", 20)
     self.size = size
     self.starttime = time.time()
     self.speed = 0.0
Example #9
0
    def init_db(cls, version, taxid, cache_dir=None, dbfilename=None):
        if cache_dir is None:
            cache_dir = serverfiles.localpath(cls.DOMAIN)

        if dbfilename is None:
            dbfilename = cls.default_db_filename(taxid)

        pjoin = os.path.join

        base_url = "http://string-db.org/newstring_download/"

        def paths(flatfile):
            url = "{flatfile}.{version}/{taxid}.{flatfile}.{version}.txt.gz"
            url = url.format(flatfile=flatfile, version=version, taxid=taxid)
            return posixpath.basename(url), base_url + url

        def ffname(pattern):
            return pattern.format(taxid=taxid, version=version)

        links_filename, links_url = paths("protein.links")

        actions_filename, actions_url = paths("protein.actions")

        aliases_filename, aliases_url = paths("protein.aliases")

        def download(filename, url):
            with open(pjoin(cache_dir, filename + ".tmp"), "wb") as dest:
                wget(url, dst_obj=dest, progress=True)

            shutil.move(pjoin(cache_dir, filename + ".tmp"),
                        pjoin(cache_dir, filename))

        for fname, url in [(links_filename, links_url),
                           (actions_filename, actions_url),
                           (aliases_filename, aliases_url)]:
            if not os.path.exists(pjoin(cache_dir, fname)):
                download(fname, url)

        links_fileobj = open(pjoin(cache_dir, links_filename), "rb")
        actions_fileobj = open(pjoin(cache_dir, actions_filename), "rb")
        aliases_fileobj = open(pjoin(cache_dir, aliases_filename), "rb")

        links_file = gzip.GzipFile(fileobj=links_fileobj)
        actions_file = gzip.GzipFile(fileobj=actions_fileobj)
        aliases_file = gzip.GzipFile(fileobj=aliases_fileobj)

        progress = ConsoleProgressBar("Processing {}:".format(links_filename))
        progress(0.0)

        def st_size(filename):
            return os.stat(pjoin(cache_dir, filename)).st_size

        filesize = st_size(links_filename)

        con = sqlite3.connect(dbfilename)

        with con:
            cls.clear_db(con)

            links_file.readline()  # read the header line

            reader = csv.reader(links_file, delimiter=" ")

            def read_links(reader, progress):
                for i, (p1, p2, score) in enumerate(reader):
                    yield p1, p2, int(score)

                    if i % 100000 == 0:
                        # Update the progress every 100000 lines
                        progress(100.0 * links_fileobj.tell() / filesize)

            con.executemany("INSERT INTO links VALUES (?, ?, ?)",
                            read_links(reader, progress))

            progress.finish()

            def part(string, sep, part):
                return string.split(sep)[part]

            con.create_function("part", 3, part)
            con.execute("""
                INSERT INTO proteins
                SELECT protein_id1, part(protein_id1, '.', 0)
                FROM (SELECT DISTINCT(protein_id1)
                     FROM links
                     ORDER BY protein_id1)
            """)

            filesize = st_size(actions_filename)

            actions_file.readline()  # read header line

            progress = ConsoleProgressBar("Processing actions:")
            reader = csv.reader(actions_file, delimiter="\t")

            def read_actions(reader):
                for i, (p1, p2, mode, action, a_is_acting, score) in \
                        enumerate(reader):
                    yield p1, p2, mode, action, int(score)

                    if i % 10000 == 0:
                        progress(100.0 * actions_fileobj.tell() / filesize)

            con.executemany("INSERT INTO actions VALUES (?, ?, ?, ?, ?)",
                            read_actions(reader))

            progress.finish()

            filesize = st_size(aliases_filename)
            aliases_file.readline()  # read header line

            progress = ConsoleProgressBar("Processing aliases:")

            reader = csv.reader(aliases_file, delimiter="\t")

            def read_aliases(reader, progress):
                for i, (taxid, name, alias, source) in enumerate(reader):
                    yield (".".join([taxid, name]),
                           alias.decode("utf-8", errors="ignore"),
                           source.decode("utf-8", errors="ignore"))
                    if i % 10000 == 0:
                        progress(100.0 * aliases_fileobj.tell() / filesize)

            con.executemany("INSERT INTO aliases VALUES (?, ?, ?)",
                            read_aliases(reader, progress))

            progress.finish()

            print("Indexing the database")
            cls.create_db_index(con)

            con.executescript("""
                DROP TABLE IF EXISTS version;
                CREATE TABLE version (
                     string_version text,
                     api_version text
                );""")

            con.execute("""
                INSERT INTO version
                VALUES (?, ?)""", (version, cls.VERSION))
Example #10
0
    def init_db(cls, version, taxid, cache_dir=None, dbfilename=None):
        if cache_dir is None:
            cache_dir = serverfiles.localpath(cls.DOMAIN)
        if dbfilename is None:
            dbfilename = serverfiles.localpath(
                cls.DOMAIN,
                "string-protein-detailed.{taxid}.sqlite".format(taxid=taxid)
            )

        pjoin = os.path.join

        base_url = "http://string-db.org/newstring_download/"
        filename = "{taxid}.protein.links.detailed.{version}.txt.gz"
        filename = filename.format(version=version, taxid=taxid)
        url = base_url + "protein.links.detailed.{version}/" + filename
        url = url.format(version=version)

        if not os.path.exists(pjoin(cache_dir, filename)):
            wget(url, cache_dir, progress=True)

        links_fileobj = open(pjoin(cache_dir, filename), "rb")
        links_file = gzip.GzipFile(fileobj=links_fileobj)

        con = sqlite3.connect(dbfilename)
        with con:
            con.execute("""
                DROP TABLE IF EXISTS evidence
            """)

            con.execute("""
                CREATE TABLE evidence(
                     protein_id1 TEXT,
                     protein_id2 TEXT,
                     neighborhood INTEGER,
                     fusion INTEGER,
                     cooccurence INTEGER,
                     coexpression INTEGER,
                     experimental INTEGER,
                     database INTEGER,
                     textmining INTEGER
                    )
                """)

            links = csv.reader(links_file, delimiter=" ")
            links.next()  # Read header
            filesize = os.stat(pjoin(cache_dir, filename)).st_size

            progress = ConsoleProgressBar("Processing links file:")
            progress(1.0)

            def read_links(reader):
                for i, (p1, p2, n, f, c, cx, ex, db, t, _) in \
                        enumerate(reader):
                    yield p1, p2, n, f, c, cx, ex, db, t

                    if i % 10000 == 0:
                        progress(100.0 * links_fileobj.tell() / filesize)

            con.executemany("""
                INSERT INTO evidence
                VALUES  (?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, read_links(links))

            progress.finish()

            print("Indexing")
            con.execute("""\
                CREATE INDEX IF NOT EXISTS index_evidence
                    ON evidence (protein_id1, protein_id2)
            """)

            con.executescript("""
                DROP TABLE IF EXISTS version;

                CREATE TABLE version (
                     string_version text,
                     api_version text
                );
                """)

            con.execute("""
                INSERT INTO version
                VALUES (?, ?)""", (version, cls.VERSION))
Example #11
0
 def getstring(self):
     speed = int(self.state * self.size / 100.0 / (time.time() - self.starttime))
     eta = (100 - self.state) * self.size / 100.0 / speed
     return ConsoleProgressBar.getstring(self) + "  %s  %12s/s  %3i:%02i ETA" % (self.sizeof_fmt(self.size), self.sizeof_fmt(speed), eta/60, eta%60)