def start(self): if not self.db_oper.is_enabled(): return repo_list = self.db_oper.get_repo_list() if repo_list is None: self.db_oper.close_db() return thread_pool = ThreadPool(self.scan_virus, self.settings.threads) thread_pool.start() for row in repo_list: repo_id, head_commit_id, scan_commit_id = row if head_commit_id == scan_commit_id: logger.debug('No change occur for repo %.8s, skip virus scan.', repo_id) continue thread_pool.put_task( ScanTask(repo_id, head_commit_id, scan_commit_id)) thread_pool.join() self.db_oper.close_db()
def download(): lines = ["Topics"] thread_pool = ThreadPool() d = False if enable_proxie[0]: refresh_proxie() filename = datetime.datetime.now().strftime("%d-%m-%Y %H-%M-%S") + '.txt' for i, enable in enumerate(download_enables): if enable: thread_pool.give_task(download_concrete_page, args=(download_hrefs[i], lines)) d = True thread_pool.join() if d: file = open(filename, 'w') file.write('\n'.join(lines)) file.close() print(f'All chosen topics are saved to {filename}') to_main_menu() else: print("Nothing is chosen") input("Press <Enter> to continue") return True
def run(self): cx, cu = self.db_connection() pool = ThreadPool(size=20) pool.start() file_submission_id = open(FILE_SUBMISSION_ID) finished_submissions = [int(item[0]) for item in cu.execute("select submission_id from code")] all_submissions = [int(item) for item in file_submission_id.readlines()] for line in list(set(all_submissions).difference(set(finished_submissions))): sleep(0.2) pool.append_job(s.job, line) pool.join() pool.stop()
def start_tasks(): thread_pool = ThreadPool(size=20) store_list = load_stores() total_count = len(store_list) count = 0 for store in store_list: count += 1 task = FetcherTask(store=store, num=count, total=total_count) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting Task Finished......') thread_pool.join()
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) pos = 0 total = len(store_list) for store in store_list: pos += 1 task = SlotStateFetchTask(store, pos=pos, total=total) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for tasks exit!!!') thread_pool.join()
def start_tasks(): stores = load_stores() thread_pool = ThreadPool(size=20) total = len(stores) pos = 0 for store in stores: pos += 1 task = UnderLoadSlotZeroTask(store=store, total=total, pos=pos) thread_pool.push_task(task) thread_pool.init_pool() thread_pool.start() print('Waiting for task exit!') thread_pool.join()
def main(): store_list = load_stores() thread_pool = ThreadPool(size=20) index = 0 total = len(store_list) for store in store_list: index += 1 task = CompensationDisableTask(store=store, index=index, total=total) thread_pool.push_task(task) thread_pool.init_pool() print('Starting tasks...') thread_pool.start() print('Waiting for task exit!') thread_pool.join()
def compile(self, jobs=0): if 0 == jobs: jobs = cpu_count() self.print_msg('BS', 'Using %s parallel job(s)' % colored(str(jobs), 'yellow')) for target in self.targets: self.print_msg('BS', 'Building target %s' % colored(target.name, 'yellow')) pool = ThreadPool(jobs) for source in target.sources: args = (target, source, None) pool.apply_async(self.compile_object, args=args, callback=self.compile_object_done) try: self._wait_for_compilation(target) except BuildError as e: raise finally: pool.close() pool.join() self.run_prefinal(target) target.final(self)
d = DBHandler() s = Spider() user_list = d.get_user_list() pool = ThreadPool(size=10) pool.start() def add_username(func, username, oj_username): data = func(oj_username) data["username"] = username return data for user in user_list: pool.append_job(add_username, s.bestcoder, user[0], user[1]) pool.append_job(add_username, s.codefoces, user[0], user[2]) pool.append_job(add_username, s.hduoj, user[0], user[3]) pool.join() pool.stop() while not pool.results.empty(): result = pool.results.get() username = result["username"] kwargs = {"bestcoder": {}, "codeforces": {}, "hduoj": {}} kwargs[result["website"]] = result d.save_user_info(username, **kwargs)
c = item[i].decode("gb2312") if i == 0: l.append(c) else: if c[0] == "&": l.append(0) else: l.append(1) rooms.append(l) with open( "data/" + campus + "." + building + "." + week + "." + week_day + ".json", "w") as f: f.write(json.dumps(rooms)) print "finish: week:" + week + " week_day:" + week_day return "success" if __name__ == "__main__": s = Spider() s.cookies = {"JSESSIONID": "8B7DA565F71772D37B04170241A757A8.TAB2;"} pool = ThreadPool(size=20) pool.start() for week in range(1, 21): for week_day in range(1, 8): print "start week:" + str(week) + " week_day:" + str(week_day) # 请自行确定info.py中的校区id和教学楼id是正确的 # 然后按照info.py中的数据修改校区和教学楼id pool.append_job(s.craw, "1709", "1783", str(week), str(week_day)) pool.join()
class ContentScan(object): def __init__(self): self.thread_pool = ThreadPool(self.diff_and_scan_content, appconfig.thread_num) self.thread_pool.start() def start(self): try: self.do_scan_task() except Exception as e: logging.warning('Error: %s', e) def do_scan_task(self): logging.info("Start scan task..") time_start = time.time() dt = datetime.utcnow() dt_str = dt.strftime('%Y-%m-%d %H:%M:%S') self.dt = datetime.strptime(dt_str,'%Y-%m-%d %H:%M:%S') edb_session = appconfig.session_cls() seafdb_session = appconfig.seaf_session_cls() # Get repo list from seafile-db Branch = SeafBase.classes.Branch VirtualRepo= SeafBase.classes.VirtualRepo q = seafdb_session.query(Branch.repo_id, Branch.commit_id) q = q.outerjoin(VirtualRepo, Branch.repo_id==VirtualRepo.repo_id) q = q.filter(VirtualRepo.repo_id == None) results = q.all() for row in results: repo_id = row.repo_id new_commit_id = row.commit_id last_commit_id = None q = edb_session.query(ContentScanRecord.commit_id) q = q.filter(ContentScanRecord.repo_id==repo_id) result = q.first() if result: last_commit_id = result[0] self.put_task(repo_id, last_commit_id, new_commit_id) # Remove deleted repo's record after all threads finished self.thread_pool.join() q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.timestamp != self.dt) q.delete() q = edb_session.query(ContentScanResult) subqurey = edb_session.query(ContentScanRecord.repo_id) q = q.filter(ContentScanResult.repo_id.notin_(subqurey)) # need fetch subqurey q.delete(synchronize_session='fetch') edb_session.commit() edb_session.close() seafdb_session.close() logging.info('Finish scan task, total time: %s seconds\n', str(time.time() - time_start)) self.thread_pool.join(stop=True) def diff_and_scan_content(self, task, client): repo_id = task.repo_id last_commit_id = task.last_commit_id new_commit_id = task.new_commit_id edb_session = appconfig.session_cls() # repo not changed, update timestamp if last_commit_id == new_commit_id: q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.repo_id==repo_id, ContentScanRecord.commit_id==last_commit_id) q.update({"timestamp": self.dt}) edb_session.commit() edb_session.close() return # diff version = 1 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if new_commit is None: version = 0 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if not new_commit: logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id) edb_session.close() return last_commit = None if last_commit_id: last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id) if not last_commit: logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id) edb_session.close() return new_root_id = new_commit.root_id last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID differ = CommitDiffer(repo_id, version, last_root_id, new_root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() # Handle renamed, moved and deleted files. q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id) results = q.all() if results: path_pairs_to_rename = [] paths_to_delete = [] # renamed dirs for r_dir in renamed_dirs: r_path = r_dir.path + '/' l = len(r_path) for row in results: if r_path == row.path[:l]: new_path = r_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # moved dirs for m_dir in moved_dirs: m_path = m_dir.path + '/' l = len(m_path) for row in results: if m_path == row.path[:l]: new_path = m_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # renamed files for r_file in renamed_files: r_path = r_file.path for row in results: if r_path == row.path: new_path = r_file.new_path path_pairs_to_rename.append((row.path, new_path)) # moved files for m_file in moved_files: m_path = m_file.path for row in results: if m_path == row.path: new_path = m_file.new_path path_pairs_to_rename.append((row.path, new_path)) for old_path, new_path in path_pairs_to_rename: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path) q = q.update({"path": new_path}) # deleted files for d_file in deleted_files: d_path = d_file.path for row in results: if d_path == row.path: paths_to_delete.append(row.path) # We will scan modified_files and re-record later, # so delete previous records now for m_file in modified_files: m_path = m_file.path for row in results: if m_path == row.path: paths_to_delete.append(row.path) for path in paths_to_delete: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path) q.delete() edb_session.commit() # scan added_files and modified_files by third-party API. files_to_scan = [] files_to_scan.extend(added_files) files_to_scan.extend(modified_files) a_count = 0 scan_results = [] for f in files_to_scan: if not self.should_scan_file (f.path, f.size): continue seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id) content = seafile_obj.get_content() if not content: continue result = client.scan(content) if result and isinstance(result, dict): item = {"path": f.path, "detail": result} scan_results.append(item) else: logging.warning('Failed to scan %s:%s', repo_id, f.path) for item in scan_results: detail = json.dumps(item["detail"]) new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail) edb_session.add(new_record) a_count += 1 if a_count >= 1: logging.info('Found %d new illegal files.', a_count) # Update ContentScanRecord if last_commit_id: q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id) q.update({"commit_id": new_commit_id, "timestamp": self.dt}) else: new_record = ContentScanRecord(repo_id, new_commit_id, self.dt) edb_session.add(new_record) edb_session.commit() edb_session.close() def put_task(self, repo_id, last_commit_id, new_commit_id): task = ScanTask(repo_id, last_commit_id, new_commit_id) self.thread_pool.put_task(task) def should_scan_file(self, fpath, fsize): if fsize > appconfig.size_limit: return False filename, suffix = splitext(fpath) if suffix[1:] not in appconfig.suffix_list: return False return True