예제 #1
0
 def init_queue(self, queue):
     DownloadThread.__queue = queue
     if DownloadThread.__queue and not DownloadThread.__queue.empty():
         logger_root.debug('Queue init succeed. Exiting ' + self.name)
         return Tr
예제 #2
0
    def run(self):
        logger_root.debug('Starting ' + self.name)
        global tmp_dir
        global queue_lock
        global error_list
        global succeed_list
        global download_list
        global exitFlag
        global err_exit

        while True:
            queue_lock.acquire()
            if not DownloadThread.__queue.empty():
                try:
                    self.q_set = DownloadThread.__queue.get(block=False)
                except:
                    queue_lock.release()
                    break
                else:
                    queue_lock.release()
                # print self.q_set      #打印队列条目
                self.dir = self.q_set[0]
                self.url = self.q_set[1]
                fname = os.path.basename(self.url)
                if self.url in download_list:    #if os.path.exists(tmp_dir + fname): 使用新的更为精确的download_list作为判断条件
                    logger_root.warning('%s duplicate download items %s.' % (self.name, self.url))
                elif not os.path.exists(self.dir + fname):
                    queue_lock.acquire()
                    download_list.add(self.url)
                    queue_lock.release()
                    logger_root.info('%s start download %s.' % (self.name, self.url))
                    try:
                        host = urllib2.urlparse.urlparse(self.url).netloc
                        headers = {'Host': host,
                                   'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36',
                                   'Accept':'*/*',
                                   'Connection':'keep-alive'
                                  }
                        req = urllib2.Request(self.url,headers=headers)
                        handle = urllib2.urlopen(req, timeout=120)
                        etag = handle.headers['etag'].strip('"')
                        s_length = int(handle.headers["Content-Length"].strip('"'))
                        d_length = 0
                        with open(tmp_dir + fname, 'wb') as f_handler:
                            while True:
                                if exitFlag:
                                    raise KeyboardInterrupt
                                buf = 4096 if s_length - d_length > 4096 else s_length - d_length
                                if buf == 0:
                                    f_handler.flush()
                                    break
                                chunk = handle.read(buf)
                                # if not chunk:   #改用Content-Length与已下载大小之差来判断
                                #     break
                                if not chunk and s_length != d_length:
                                    raise Exception, 'Network failure appeared in the process of download %s.' % self.url
                                f_handler.write(chunk)
                                f_handler.flush()
                                d_length += len(chunk)
                    except KeyboardInterrupt:
                        while not f_handler.closed:
                            time.sleep(1)
                        if self.check_file(tmp_dir + fname, etag):
                            move(tmp_dir + fname, self.dir + fname)
                            succeed_list.add(self.url)
                            logger_root.info('%s Successful download %s.' % (self.name, self.url))
                        else:
                            os.remove(tmp_dir + fname)
                            # error_list.add((self.dir, self.url))
                            logger_root.warning('%s stop download %s' % (self.name, self.url))
                        break
                    except URLError, e:
                        logger_root.error('%s %s %s' % (self.name, self.url, str(e)))
                        error_list.add((self.dir, self.url))
                        queue_lock.acquire()
                        download_list.discard(self.url)
                        queue_lock.release()
                        continue
                    except socket.timeout, e:
                        os.remove(tmp_dir + fname)
                        logger_root.error('%s %s %s' % (self.name, self.url, str(e)))
                        error_list.add((self.dir, self.url))
                    except IOError, e:
                        os.remove(tmp_dir + fname)
                        logger_root.error('%s %s %s' % (self.name, self.url, str(e)))
                        print traceback.format_exc()
                        break
                    except Exception, e:
                        os.remove(tmp_dir + fname)
                        logger_root.error('%s %s %s' % (self.name, self.url, str(e)))
                        error_list.add((self.dir, self.url))
                        print traceback.format_exc()
예제 #3
0
                            error_list.add((self.dir, self.url))
                            logger_root.error('%s Incomplete download %s, source file length is %s, downloaded file length is %s.' % (self.name, self.url, s_length, d_length))
                    finally:
                        try:
                            handle.close()
                            queue_lock.acquire()
                            download_list.discard(self.url)
                            queue_lock.release()
                        except Exception, e:
                            # logger_root.error('try_finally %s %s %s.' % (self.name, self.url, str(e)))
                            pass
            else:
                queue_lock.release()
                break
        if exitFlag:
            logger_root.debug('receive a signal to exit, [%s] stop.' % self.name)
        else:
            logger_root.debug('[%s] exit.' % self.name)

    def check_file(self, file, etag):
        stat = False
        qetag = get_qetag(file)
        if qetag == etag:
            stat = True
        # else:
        #     logger_root.error('%s file %s checked failed. The source file\'s etag was %s, download file\'s etag is %s' % (self.name, self.url, etag, qetag))
        return stat


def get_error_list():
    ret = []