def fetch(self): from_ = self.window.first to = self.window.last self.headers['Range'] = 'bytes=%d-%d' % (from_, to) self.headers['Connection'] = 'close' for i in xrange(self.max_retry): if self.stopped.is_set(): return try: time_start = time.time() self.conn = create_connection(self.method, self.url, self.payload, self.headers) self.response = self.conn.getresponse() resp = self.response # assert resp.status == 206, 'actually is %d' % resp.status self.window.fill(resp.read()) self.conn.close() assert self.content_length()==to-from_+1, 'expected %d, received %d' % (self.content_length(), to-from_+1) time_elapsed = time.time()-time_start except Exception as e: if i != self.max_retry-1: logging.exception('The #%d attempt to fetch %d-%d failed, try again.', i+1, from_, to) time.sleep(2) else: self.finished += self.content_length() speed = self.content_length()/time_elapsed SlidingWindow.adjust_wnd_size(speed*SOCKET_TIMEOUT_SEC) logging.info('Content-Range %s, "%s" at %.2fKB/s, finished %.2f%%', self.getheader('Content-Range')[6:], self.url, speed/1024, 100*float(self.finished)/self.tot_size) return else: logging.exception('>>>>>>>>>>>>>>> Range Fetch failed(%r) %d-%d', self.url, from_, to)
def spawn(self): self.conn.close() # ensure closed start = self.fetch_from() tot_size = self.content_length() sliding_window = SlidingWindow(start, tot_size, THREAD_POOL_SIZE) # int passed by value not reference, # in this model I need threads to share the same var, thus created in the public place finished = AtomicInt(PRE_READ_SIZE) def async_spawn(): for wnd in sliding_window.available_window(): # self.headers.copy(), multiple threads will modify headers so we cannot share it rf = RangeFetch(self.method, self.url, self.payload, self.headers.copy(), wnd, finished, tot_size, self.stopped) task_queue.add_task(rf.fetch) threading.Thread(target=async_spawn).start() for data in sliding_window.full_window(): yield data
def fetch(self): from_ = self.window.first to = self.window.last self.headers['Range'] = 'bytes=%d-%d' % (from_, to) self.headers['Connection'] = 'close' for i in xrange(self.max_retry): if self.stopped.is_set(): return try: time_start = time.time() self.conn = create_connection(self.method, self.url, self.payload, self.headers) self.response = self.conn.getresponse() resp = self.response # assert resp.status == 206, 'actually is %d' % resp.status self.window.fill(resp.read()) self.conn.close() assert self.content_length( ) == to - from_ + 1, 'expected %d, received %d' % ( self.content_length(), to - from_ + 1) time_elapsed = time.time() - time_start except Exception as e: if i != self.max_retry - 1: logging.exception( 'The #%d attempt to fetch %d-%d failed, try again.', i + 1, from_, to) time.sleep(2) else: self.finished += self.content_length() speed = self.content_length() / time_elapsed SlidingWindow.adjust_wnd_size(speed * SOCKET_TIMEOUT_SEC) logging.info( 'Content-Range %s, "%s" at %.2fKB/s, finished %.2f%%', self.getheader('Content-Range')[6:], self.url, speed / 1024, 100 * float(self.finished) / self.tot_size) return else: logging.exception('>>>>>>>>>>>>>>> Range Fetch failed(%r) %d-%d', self.url, from_, to)