Example #1
0
    def _copy_chunks(self):
        init = fs_encode(self.info.get_chunk_name(0))  #: initial chunk name

        if self.info.get_count() > 1:
            with open(init, "rb+") as fo:  #: first chunkfile
                for i in xrange(1, self.info.get_count()):
                    # input file
                    fo.seek(
                        self.info.get_chunk_range(i - 1)[1] + 1)  #: seek to beginning of chunk, to get rid of overlapping chunks
                    fname = fs_encode("%s.chunk%d" % (self.filename, i))
                    with open(fname, "rb") as fi:
                        buf = 32 * 1024
                        while True:  #: copy in chunks, consumes less memory
                            data = fi.read(buf)
                            if not data:
                                break
                            fo.write(data)
                    if fo.tell() < self.info.get_chunk_range(i)[1]:
                        os.remove(init)
                        self.info.remove()  #: there are probably invalid chunks
                        raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.")
                    os.remove(fname)  #: remove chunk

        if self.nameDisposition and self.disposition:
            self.filename = fs_join(os.path.dirname(self.filename), self.nameDisposition)

        shutil.move(init, fs_encode(self.filename))
        self.info.remove()  #: remove info file
Example #2
0
def get_download(path):
    path = urllib.unquote(decode(path))
    #@TODO some files can not be downloaded

    root = API.get_config_value("general", "download_folder")

    path = path.replace("..", "")
    return bottle.static_file(fs_encode(path), fs_encode(root))
Example #3
0
    def load(name):
        fs_name = fs_encode("%s.chunks" % name)
        if not os.path.exists(fs_name):
            raise IOError()
        fh = codecs.open(fs_name, "r", "utf_8")
        name = fh.readline()[:-1]
        size = fh.readline()[:-1]
        if name.startswith("name:") and size.startswith("size:"):
            name = name[5:]
            size = size[5:]
        else:
            fh.close()
            raise WrongFormat()
        ci = ChunkInfo(name)
        ci.loaded = True
        ci.setSize(size)
        while True:
            if not fh.readline():  #: skip line
                break
            name = fh.readline()[1:-1]
            range = fh.readline()[1:-1]
            if name.startswith("name:") and range.startswith("range:"):
                name = name[5:]
                range = range[6:].split("-")
            else:
                raise WrongFormat()

            ci.addChunk(name, (long(range[0]), long(range[1])))
        fh.close()
        return ci
Example #4
0
def downloads():
    root = API.get_config_value("general", "download_folder")

    if not os.path.isdir(root):
        return base([_('Download directory not found.')])
    data = {
        'folder': [],
        'files': []
    }

    items = os.listdir(fs_encode(root))

    for item in sorted([fs_decode(x) for x in items]):
        if os.path.isdir(fs_join(root, item)):
            folder = {
                'name': item,
                'path': item,
                'files': []
            }
            files = os.listdir(fs_join(root, item))
            for file in sorted([fs_decode(x) for x in files]):
                try:
                    if os.path.isfile(fs_join(root, item, file)):
                        folder['files'].append(file)
                except Exception:
                    pass

            data['folder'].append(folder)
        elif os.path.isfile(os.path.join(root, item)):
            data['files'].append(item)

    return render_to_response('downloads.html', {'files': data}, [pre_processor])
Example #5
0
 def save(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     fh = codecs.open(fs_name, "w", "utf_8")
     fh.write("name:%s\n" % self.name)
     fh.write("size:%s\n" % self.size)
     for i, c in enumerate(self.chunks):
         fh.write("#%d:\n" % i)
         fh.write("\tname:%s\n" % c[0])
         fh.write("\trange:%i-%i\n" % c[1])
     fh.close()
Example #6
0
    def get_handle(self):
        """Returns a Curl handle ready to use for perform/multiperform"""

        self.set_request_context(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj)
        self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)

        # request all bytes, since some servers in russia seems to have a defect arihmetic unit

        fs_name = fs_encode(self.p.info.get_chunk_name(self.id))
        if self.resume:
            self.fp = open(fs_name, "ab")
            self.arrived = self.fp.tell()
            if not self.arrived:
                self.arrived = os.stat(fs_name).st_size

            if self.range:
                # do nothing if chunk already finished
                if self.arrived + self.range[0] >= self.range[1]:
                    return None

                if self.id == len(self.p.info.chunks) - 1:  #: as last chunk dont set end range, so we get everything
                    range = "%i-" % (self.arrived + self.range[0])
                else:
                    range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked resume with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)
            else:
                self.log.debug("Resume File from %i" % self.arrived)
                self.c.setopt(pycurl.RESUME_FROM, self.arrived)

        else:
            if self.range:
                if self.id == len(self.p.info.chunks) - 1:  #: see above
                    range = "%i-" % self.range[0]
                else:
                    range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1))

                self.log.debug("Chunked with range %s" % range)
                self.c.setopt(pycurl.RANGE, range)

            self.fp = open(fs_name, "wb")

        return self.c
Example #7
0
    def check_download(self, rules, api_size=0, max_size=50000, delete=True, read_size=0):
        """ checks the content of the last downloaded file, re match is saved to `lastCheck`

        :param rules: dict with names and rules to match (compiled regexp or strings)
        :param api_size: expected file size
        :param max_size: if the file is larger then it wont be checked
        :param delete: delete if matched
        :param read_size: amount of bytes to read from files larger then max_size
        :return: dictionary key of the first rule that matched
        """
        lastDownload = fs_encode(self.lastDownload)
        if not os.path.exists(lastDownload):
            return None

        size = os.stat(lastDownload)
        size = size.st_size

        if api_size and api_size <= size:
            return None
        elif size > max_size and not read_size:
            return None
        self.log_debug("Download Check triggered")

        with open(lastDownload, "rb") as f:
            content = f.read(read_size if read_size else -1)

        # produces encoding errors, better log to other file in the future?
        # self.log_debug("Content: %s" % content)
        for name, rule in rules.iteritems():
            if isinstance(rule, basestring):
                if rule in content:
                    if delete:
                        os.remove(lastDownload)
                    return name
            elif hasattr(rule, "search"):
                m = rule.search(content)
                if m:
                    if delete:
                        os.remove(lastDownload)
                    self.lastCheck = m
                    return name
Example #8
0
 def remove(self):
     fs_name = fs_encode("%s.chunks" % self.name)
     if os.path.exists(fs_name): os.remove(fs_name)
Example #9
0
    def _download(self, chunks, resume):
        if not resume:
            self.info.clear()
            self.info.add_chunk("%s.chunk0" % self.filename, (0, 0))  #: create an initial entry
            self.info.save()

        self.chunks = []

        init = HTTPChunk(0, self, None, resume)  #: initial chunk that will load complete file (if needed)

        self.chunks.append(init)
        self.manager.add_handle(init.get_handle())

        lastFinishCheck = 0
        lastTimeCheck = 0
        chunksDone = set()  #: list of curl handles that are finished
        chunksCreated = False
        done = False
        if self.info.get_count() is 0:  #: This is a resume, if we were chunked originally assume still can
            self.chunkSupport = False

        while 1:
            # need to create chunks
            if not chunksCreated and self.chunkSupport and self.size:  #: will be setted later by first chunk

                if not resume:
                    self.info.set_size(self.size)
                    self.info.create_chunks(chunks)
                    self.info.save()

                chunks = self.info.get_count()

                init.setRange(self.info.get_chunk_range(0))

                for i in xrange(1, chunks):
                    c = HTTPChunk(i, self, self.info.get_chunk_range(i), resume)

                    handle = c.getHandle()
                    if handle:
                        self.chunks.append(c)
                        self.manager.add_handle(handle)
                    else:
                        # close immediatly
                        self.log.debug("Invalid curl handle -> closed")
                        c.close()

                chunksCreated = True

            while 1:
                ret, num_handles = self.manager.perform()
                if ret != pycurl.E_CALL_MULTI_PERFORM:
                    break

            t = time.time()

            # reduce these calls
            while lastFinishCheck + 0.5 < t:
                # list of failed curl handles
                failed = []
                ex = None  #: save only last exception, we can only raise one anyway

                num_q, ok_list, err_list = self.manager.info_read()
                for c in ok_list:
                    chunk = self.find_chunk(c)
                    try:  #: check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(c)

                for c in err_list:
                    curl, errno, msg = c
                    chunk = self.find_chunk(curl)
                    # test if chunk was finished
                    if errno != 23 or "0 !=" not in msg:
                        failed.append(chunk)
                        ex = pycurl.error(errno, msg)
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
                        continue

                    try:  #: check if the header implies success, else add it to failed list
                        chunk.verifyHeader()
                    except BadHeader, e:
                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
                        failed.append(chunk)
                        ex = e
                    else:
                        chunksDone.add(curl)
                if not num_q:  #: no more infos to get

                    # check if init is not finished so we reset download connections
                    # note that other chunks are closed and downloaded with init too
                    if failed and init not in failed and init.c not in chunksDone:
                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))

                        # list of chunks to clean and remove
                        to_clean = filter(lambda x: x is not init, self.chunks)
                        for chunk in to_clean:
                            self.close_chunk(chunk)
                            self.chunks.remove(chunk)
                            os.remove(fs_encode(self.info.get_chunk_name(chunk.id)))

                        # let first chunk load the rest and update the info file
                        init.resetRange()
                        self.info.clear()
                        self.info.add_chunk("%s.chunk0" % self.filename, (0, self.size))
                        self.info.save()
                    elif failed:
                        raise ex

                    lastFinishCheck = t

                    if len(chunksDone) >= len(self.chunks):
                        if len(chunksDone) > len(self.chunks):
                            self.log.warning("Finished download chunks size incorrect, please report bug.")
                        done = True  #: all chunks loaded

                    break
Example #10
0
        try:
            newname = self.req.http_download(url, filename, get=get, post=post, ref=ref, cookies=cookies,
                                            chunks=self.get_chunk_count(), resume=self.resumeDownload,
                                            progressNotify=self.pyfile.setProgress, disposition=disposition)
        finally:
            self.pyfile.size = self.req.size

        if newname:
            newname = urlparse.urlparse(newname).path.split('/')[-1]

            if disposition and newname != name:
                self.log_info(_("%(name)s saved as %(newname)s") % {"name": name, "newname": newname})
                self.pyfile.name = newname
                filename = os.path.join(location, newname)

        fs_filename = fs_encode(filename)

        if self.core.config.get("permission", "change_file"):
            try:
                os.chmod(fs_filename, int(self.core.config.get("permission", "file"), 8))
            except Exception, e:
                self.log_warning(_("Setting file mode failed"), e)

        if self.core.config.get("permission", "change_dl") and os.name != "nt":
            try:
                uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2]
                gid = grp.getgrnam(self.core.config.get("permission", "group"))[2]
                os.chown(fs_filename, uid, gid)

            except Exception, e:
                self.log_warning(_("Setting User and Group failed"), e)