def __init__(self, name, num_threads=None): threading.Thread.__init__(self, name=name) if num_threads is None: num_threads = SystemOption.get(DBSession()).inventory_threads # Set up the thread pool self.pool = Pool(num_threads)
def __init__(self, name, num_threads=None): threading.Thread.__init__(self, name=name) db_session = DBSession() if num_threads is None: num_threads = SystemOption.get(db_session).install_threads # Set up the thread pool self.pool = Pool(num_threads)
def fetch(): url = "http://pages.cs.wisc.edu/~remzi/OSTEP/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" } try: content = get_html_content(url, headers) except FetchFailed: print(f"Error: failed to get response from {url=}", file=sys.stderr) exit(1) except Exception as e: print(f"Error: failed to run, caused by \"{e}\"", file=sys.stderr) exit(1) root_dir = ROOTDIR if not os.path.isdir(root_dir): os.mkdir(root_dir) soup = BeautifulSoup(content, "lxml") background_colors = ["yellow", "#f88017", "#00aacc", "#4CC417", "#3EA99F"] number = 0 sub_dir = root_dir pool = Pool(10) pool.run() for idx, color in enumerate(background_colors): for td in soup.find_all("td", bgcolor=color): if (b := td.b) and b.text: sub_dir = f"{root_dir}/{idx}-{b.text}" if not os.path.isdir(sub_dir): os.mkdir(sub_dir) if s := td.small: number = int(s.text) + 1 if a := td.a: name = f"{str(number).zfill(2)} {a['href']}" fpath = f"{sub_dir}/{name}" file_url = f"{url}{a['href']}" pool.submit(work, url=file_url, headers=headers, fpath=fpath) number += 1
if p.ref: b = p.process(out[p.ref.idx], idx=j) else: b = p.process(buf, idx=j) except NormalizeException, e: #mark errors and report them later. we are in threading unfortunately self.errors.append(j + 1) out.append(None) continue out.append(b) if p.save: print "saving file", f_out, p.save write_file(f_out % str(p), b) p = Pool(4) try: p.map( handle, enumerate( self.path_iter(self.path, os.path.join(self.path, "%s")))) except Exception, e: p.terminate() print self.errors raise e print "handling" print self.errors p.terminate() for i, p in enumerate(self.processors): p.finalize()