Ejemplo n.º 1
0
    def execute(self):
        print "\t Job #%s STARTED @ %s -- Remaining: %s." % (self.id, get_time(), Job.UNFINISHED_COUNT)
#         print "\t action: %r (%r, %r)" % (self.action, self.args, self.kwargs)
        try:
            self.action(*self.args, **self.kwargs)
        except Exception, e:
            print_exc(e)
Ejemplo n.º 2
0
def download_file(url,
        base_path = None,
        localFileName = None,
        cookie = None,
        referer = None,
        download_logs = None
        , observer = None):

    def emit(progress):
        if not observer:
            return
        observer.notify('progress:change', progress)

    print "\t\t|- Downloading...: %s" % url
    #===========================================================================
    # create http request
    #===========================================================================
    headers = {
#        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#        'Accept-Encoding': 'gzip, deflate',
#        'Accept-Language': 'en-US,en;q=0.5',
#        'Connection': 'keep-alive',
#        'DNT': '1',
#        'Host': 'media.cdn.pz10.com',
        'Cookie': cookie,
        'Referer': referer or url,
        'User-Agent': DESKTOP_USER_AGENT
    }
    response = requests.get(url, headers = headers, stream = True)
#    print response.text

    #===========================================================================
    # Figure out filename
    #===========================================================================
    if localFileName:
        # we can force to save the file as specified name
        localName = localFileName
    else:
        localName = url2name(url)
        if response.headers.get('Content-Disposition'):
            # If the response has Content-Disposition, we take file name from it
            localName = response.headers['Content-Disposition'].split('filename=')[1]
            if localName[0] == '"' or localName[0] == "'":
                localName = localName[1:-1]
        elif response.url != url:
            # if we were redirected, the real file name we take from the final URL
            localName = url2name(response.url)

    #===========================================================================
    # create full path
    #===========================================================================
    localName = safe_filename(localName)
    # Create the temp folder, if not there
    file_path = '%s/%s' % (base_path, localName)
    temp_path = '../src/temp/' + localName
    if base_path[-1] == '/':
        file_path = '%s%s' % (base_path, localName)

    # Check the file has not been downloaded.
    if download_logs and was_downloaded(download_logs, localName):
        print "\t\t\t   |- Already downloaded: %s" % localName
        emit(100)
        return False

    #===========================================================================
    # download and save the file
    #===========================================================================
    print "\t\t\t|- Save file: %s" % file_path
#    print response.headers
    response_size = int(response.headers.get('content-length', 1))
    if os.path.exists(file_path):
        local_size = os.path.getsize(file_path)
        print "\t\t\t   |- local_size (%s) >= response_size (%s) :: %s" % (local_size, response_size, local_size >= response_size)
        if local_size >= response_size:
            print "\t\t\t|- Already finished: %s" % localName
            update_log(download_logs, localName)
            emit(100)
            return False
    else:
        local_size = -1

    # if response.info().has_key('Content-Length'):
    #    response_size = int( response.info()['Content-Length'] )
    with open(temp_path, 'wb') as file_handle:
        #===========================================================================
        # progressive download
        #===========================================================================
        start_time = time()
        bytes_recieved = 0
        # 1 megabyte = 1 048 576 bytes
        # 100 kilobytes = 102 400 bytes
        chunk_size = 102400
        old_percent = -1
        no_amount_tries = 0
        MAX_RETRIES = 5
        for chunk in response.iter_content(chunk_size = chunk_size):
            file_handle.write(chunk)
            # progress tracking
            bytes_len = len(chunk)
            bytes_recieved += bytes_len
            new_percent = int(float(bytes_recieved) / response_size * 100)
            if new_percent > old_percent:
                emit(new_percent)
                print "\t\t\t\t%s :: %s%%  @ %s" % (localName, new_percent, get_time())

            # we want to detect if we are not getting any data.
            old_percent = new_percent
            if bytes_len == 0:
                no_amount_tries += 1
            else:
                no_amount_tries = 0

    # Conclude
    if no_amount_tries >= MAX_RETRIES or bytes_recieved != response_size:
        print "\n\t\t\t |- FAILURE DUE TO INCOMPLETE :: %s / %s" % (format_filesize(bytes_recieved), format_filesize(response_size))
    else:
        end_time = time()
        update_log(download_logs, localName)
        shutil.move(temp_path, file_path)
        print "\n\t\t\t |- FINISHED %s :: %s / %s in %d seconds." % (localName, format_filesize(bytes_recieved), format_filesize(response_size), end_time - start_time)