def _schedule_processes(self, tasklist, _worker): # Reset the global flag that allows global _stop_all_processes _subprocess_container.stop_all = False # Make a shallow copy of the task list, # so we don't mess with the callers list. tasklist = copy.copy(tasklist) number_tasks = len(tasklist) if number_tasks == 0: totaltime = 0 return totaltime use_threading = number_tasks > 1 and self.num_processes > 1 starttime = time.process_time() task_queue = Queue() pbar = _ProgressBar(number_tasks, self.silent) pbar.animate(0) processed_tasks = [] n_errors = 0 threads = [] try: # run while there is still threads, tasks or stuff in the queue # to process while threads or tasklist or task_queue.qsize(): # if we aren't using all the processors AND there is still # data left to compute, then spawn another thread if (len(threads) < self.num_processes) and tasklist: if use_threading: t = Thread( target=_worker, args=tuple([tasklist.pop(0), task_queue]) ) t.daemon = True t.start() threads.append(t) else: _worker(tasklist.pop(0), task_queue) else: # In the case that we have the maximum number # of running threads or we run out tasks. # Check if any of them are done for thread in threads: if not thread.isAlive(): threads.remove(thread) while task_queue.qsize(): task = task_queue.get() if task.has_error(): n_errors += 1 self.summery.task_summery(task) processed_tasks.append(task) pbar.animate(len(processed_tasks), n_errors) time.sleep(0.01) except KeyboardInterrupt: _display("Processing interrupted") _subprocess_container.stop_all = True # Add a small delay here. It allows the user to press ctrl-c twice # to escape this try-catch. This is usefull when if the code is # run in an outer loop which we want to excape as well. time.sleep(1) totaltime = time.process_time() - starttime return totaltime
def _cost_limited_dfs(self): """Run the DFS from a node Returns: (board, next_min) Board can either be a board or None """ frontiers_list = Queue() frontiers_list.put(self.start_state) next_min = inf while True: if frontiers_list.empty(): return (None, next_min) # Pop a node off the stack board = frontiers_list.get() cost = self._get_cost(board) # If the cost is less than the cutoff, we can continue if cost <= self.cut_off: if board.num_pegs == 1: return (board, next_min) for index, move in enumerate(board.moves): next_board = board.execute_move(index) self.num_visited += 1 frontiers_list.put(next_board) if self.max_space < frontiers_list.qsize(): self.max_space = frontiers_list.qsize() else: if cost < next_min: next_min = cost
class ProxyManager(object): def __init__(self): self.is_alive = True self.proxies = Queue() self.scraper = Scraper() self.bad_proxies = BadProxies() def collect(self): while self.is_alive: if not self.proxies.qsize(): for proxy in self.scraper.proxies: if not proxy in self.bad_proxies: self.proxies.put(proxy) sleep(0.5) def bad_proxy(self, proxy): if not proxy in self.bad_proxies: self.bad_proxies.append(proxy) def get_proxy(self): if self.proxies.qsize(): return self.proxies.get() def start(self): self.collect() def stop(self): self.is_alive = False self.scraper.is_alive = False
class Master(threading.Thread): def __init__(self): super().__init__() self.setDaemon(True) self.que = Queue() self.conn = create_engine("postgresql://gaoyb7@localhost/dht_demo").connect() self.ins = hash_tab.insert() def log_in_database_demo(self, infohash, name): try: self.conn.execute(self.ins, infohash=infohash, name=name) except Exception as e: pass def logger(self): while True: if self.que.empty(): sleep(1) continue else: r = self.que.get() self.log_in_database_demo(r[1], r[2]) def run(self): #while True: # self.fetch() dt = threading.Thread(target=self.logger) dt.setDaemon(True) dt.start() while True: if threading.activeCount() < 1500: if self.que.qsize() == 0: sleep(1) continue r = self.que.get() t = threading.Thread(target=fetch_metadata, args=(r[0], r[1], r[2])) t.setDaemon(True) t.start() else: sleep(1) def fetch(self): for i in range(100): if self.que.qsize() == 0: sleep(1) continue r = self.que.get() t = threading.Thread(target=fetch_metadata, args=(r[0], r[1], r[2])) t.setDaemon(True) t.start() def log(self, nid, infohash, name, address): #print("%s %s" % (codecs.encode(infohash, "hex_codec").decode(), name.decode("utf-8"))) #fetch_metadata(nid, infohash, address) #print(self.que.qsize()) if self.que.qsize() > 5000: sleep(1) self.que.put([nid, codecs.encode(infohash, "hex_codec").decode(), name.decode()])
class Fetcher: def __init__(self,threads,subject): self.opener = urllib.request.build_opener(urllib.request.HTTPHandler) self.lock = Lock() self.q_req = Queue() self.q_ans = Queue() self.threads = threads self.subject = subject for i in range(threads): t = Thread(target=self.threadget,args=subject) t.setDaemon(True) t.start() self.running = 0 def __del__(self): time.sleep(0.5) self.q_req.join() self.q_ans.join() def taskleft(self): return self.q_req.qsize()+self.q_ans.qsize()+self.running def push(self, req): self.q_req.put(req) def pop(self, ans): return self.q_ans.get() def download_imag(self, subject): global count s = requests.session() imag = s.get(subject['cover']) name = subject['title'] path = '/users/peibibing/PycharmProjects/douban/douban_movie/%s.jpg'%name with open(path,'wb') as f: f.write(imag.content) count += 1 print(count) return 'ok' def threadget(self,sub): while True: req = self.q_req.get() with self.lock: #保证操作的原子性 self.running += 1 try: # ans = download_imag(sub) ans = self.opener.open(req).read() except Exception: ans = 'error' print(ans) self.q_ans.put((req,ans)) with self.lock: self.running -= 1 self.q_req.task_done() time.sleep(0.1)
class PingThem(): def __init__(self, targets, maxthreads=100): self.q1 = Queue(maxsize=0) self.q2 = Queue(maxsize=0) self.maxthreads = maxthreads if len(targets) >= maxthreads else len(targets) for target in targets: self.q1.put(target) logging.info("Done adding all targets") print(self.q1.qsize()) def worker(self): while 1: i = self.q1.get() # logging.info("Got value from queue: {0}".format(i)) # quit cond if i is None: break p = PingIt() r = p.doping(i) self.q2.put(r) self.q1.task_done() def run(self): print("Will start {0} threads for checking ...".format(self.maxthreads)) allts = [] for i in range(self.maxthreads): t = Thread(target=self.worker) t.start() allts.append(t) self.q1.join() for i in range(self.maxthreads): self.q1.put(None) for t in allts: t.join() # check q2 logging.info(self.q2.qsize()) ret = [] for j in range(self.q2.qsize()): i = self.q2.get() if i is None: break ret.append(i) return ret
def downloads(urls, outputs=[], concurrency=cpu_count()): # 用于线程同步的队列 exit_queue = Queue(1) job_queue = Queue() result_queue = Queue() # 创建下载任务,并加入到任务队列 outputs = [None for _ in urls] if not outputs else outputs for url, output in zip(urls, outputs): job_queue.put(Param(url, output)) job_size = job_queue.qsize() works = [] # 创建工作线程并启动 concurrency = job_size if concurrency > job_size else concurrency for _ in range(concurrency): t = Worker(job_queue, result_queue, exit_queue) works.append(t) t.start() # 检测任务是否完成,主要有两种情况 # 1.所有任务都执行了 # 2.用户主动按ctrl+c结束任务,这里会等待已经运行的任务继续运行 alive = True try: while alive: for work in works: if work.isAlive(): alive = True break else: alive = False if result_queue.qsize() == job_size and exit_queue.qsize() == 0: exit_queue.put(1) except KeyboardInterrupt: logger.warning("ctrl + c is precessed!wait running task to complate..") exit_queue.put(1) for work in works: if work.isAlive(): work.join() # 结果收集并返回 results = [] while job_queue.qsize() > 0: param = job_queue.get_nowait() results.append(Result(False, "task not excute", param.url)) while result_queue.qsize() > 0: result = result_queue.get_nowait() results.append(result) return results
class Fetcher: def __init__(self, threads_num): self.opener = urllib.request.build_opener(urllib.request.HTTPHandler) self.lock = Lock() # 线程锁 self.q_req = Queue() # 任务队列 self.q_ans = Queue() # 结果队列 self.threads_num = threads_num for i in range(threads_num): t = Thread(target=self.deal_task) t.setDaemon(True) t.start() self.running = 0 def __del__(self): # 解构时需等待两个队列的任务完成 time.sleep(0.5) self.q_req.join() self.q_ans.join() def task_left(self): return self.q_req.qsize() + self.q_ans.qsize() + self.running def push(self, task): self.q_req.put(task) def pop(self): return self.q_ans.get() def deal_task(self): while True: req = self.q_req.get() with self.lock: # 保证该操作的原子性 self.running += 1 ans = self.get_data(req) self.q_ans.put(ans) with self.lock: self.running -= 1 self.q_req.task_done() time.sleep(0.1) def get_data(self, req, retries=3): # 失败后的重连机制 data = '' try: data = self.opener.open(req, timeout=10).read() # 设置超时时间为10秒 except urllib.request.URLError as e: if retries > 0: return self.get_data(req, retries - 1) print('GET Failed.', req) print(e.reason) return data
def train(self, texts, chunksize=100, workers = 2): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of utf8 strings. """ if not training_methods_imported: raise NotImplementedError(err_msg) logger.info("training model with %i workers" % (workers)) start, next_report = time.time(), [1.0] jobs = Queue(maxsize=2 * workers) # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( lock = threading.Lock() # for shared state (=number of words trained so far, log reports...) total_error = [0.0] objects_done = [0] def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" observation_work = np.zeros(self.window * self.size + self.object_size, dtype = REAL) prediction_work = np.zeros(self.output_size, dtype = REAL) composition_work = np.zeros([max(self.output_size, self.window * self.size + self.object_size), self.window * self.size + self.object_size], dtype = REAL) if self.bilinear_form else None while True: job = jobs.get() if job is None: # data finished, exit break # how many words did we train on? out-of-vocabulary (unknown) words do not count error = sum(train_sentence_concatenation(self, sentence, object_index, softmax_target, sigmoid_target, self._alpha, prediction_work, observation_work, composition_work) for sentence, object_index, softmax_target, sigmoid_target in job) with lock: total_error[0] += error objects_done[0] += len(job) elapsed = time.time() - start if elapsed >= next_report[0]: logger.info("PROGRESS: %s objects, %.0f objects/s" % (objects_done[0], float(objects_done[0]) / elapsed if elapsed else 0.0)) next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports dynos = [threading.Thread(target=worker_train) for _ in range(0,workers)] for thread in dynos: thread.daemon = True # make interrupting the process with ctrl+c easier thread.start() # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue no_oov = ((np.array([self.vocab.get_index(word) for word in sentence], dtype = INT), object_index, softmax_target, sigmoid_target) for sentence, object_index, softmax_target, sigmoid_target in texts) for job_no, job in enumerate(grouper(no_oov, chunksize)): logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize())) jobs.put(job) logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize()) for _ in range(0,workers): jobs.put(None) # give the workers heads up that they can finish -- no more work! for thread in dynos: thread.join() elapsed = time.time() - start logger.info("training on %i objects took %.1fs, %.0f words/s" % (objects_done[0], elapsed, objects_done[0] / elapsed if elapsed else 0.0)) return (objects_done[0], total_error[0])
class EntityQueue: def __init__(self, maxsize = 1000): self.queue = Queue(maxsize) self.enqueuing_flags = {} def put(self, item, block = True, timeout = None): self.queue.put(item, block, timeout=timeout) def get(self, block = True, timeout = None): return self.queue.get(block, timeout) def qsize(self): return self.queue.qsize() def empty(self): return self.queue.empty() and not self.is_enqueuing() def full(self): return self.queue.full() def add_enqueuing_flag(self, id): self.enqueuing_flags[id] = True def update_enqueuing_flag(self, id, state): self.enqueuing_flags[id] = state def is_enqueuing(self): is_enqueuing = True for flag in self.enqueuing_flags.values(): is_enqueuing = is_enqueuing and flag return is_enqueuing
class WFOpenedClosedLists(SwapPolitic): def __init__(self, table): self.max_depth = 3 self.table = table self.tries = 0 self.openedList = Queue() self.closedList = [] self.openedList.put((table, 0)) def run(self): while self.openedList.qsize() != 0: (table, depth) = self.openedList.get() table.printState() if depth >= self.max_depth: continue self.tries += 1 if table.isValid(): self.table = table return ret = None for i in range(0, table.count - 1): tt = table.copy() tt.swapTwoAdjacent(i) self.openedList.put((tt, depth+1)) self.closedList.append((table, depth))
def start_testing(unique_info, test_continues): """Multithreaded testing of files fetched from database. Reports first buttons found. Composes list of unique files with specified extentions.""" worker_threads = Queue(TESTING_THREADS_ALLOWED) print("starting ", TESTING_THREADS_ALLOWED, " threads\n") test_thread_id = 0 all_files_tested = False while time.time() - db.time_of_update[0] < DB_WATCHDOG_TIME \ and (not all_files_tested) : """ Spawn threads to fetch, test files and update database until all files uploaded to DB and tested or no changes happened to DB for DB_WATCHDOG_TIME seconds.""" print(time.time() - db.time_of_update[0]) if worker_threads.qsize() < TESTING_THREADS_ALLOWED: worker_threads.put(test_thread_id) worker = threading.Thread(target=tester.tester, \ args=(worker_threads, \ conn_data, \ unique_info, EXTENTION_TO_FIND, ALLOWED_APP_RUNTIME )) worker.setDaemon(True) worker.start() test_thread_id += 1 time.sleep(0.01) if test_continues.qsize() < 2: # tree composed and uploaded all_files_tested = db.check_test_completion(conn_data, EXTENTION_TO_FIND) print ("Testing thread waiting for all worker-threads to complete\n") worker_threads.join() print ("Testing Thread Checked all unique ",EXTENTION_TO_FIND, " files\n") test_continues.get() test_continues.task_done()
def test(): cell_codes = [ {"cid": 5465, "mcc": 460, "lac": 2, "mnc": 0}, {"cid": 4198, "mcc": 460, "lac": 12, "mnc": 0}, {"cid": 12175, "mcc": 460, "lac": 12, "mnc": 0}, {"cid": 10, "mcc": 460, "lac": 17, "mnc": 0}, {"cid": 8, "mcc": 460, "lac": 18, "mnc": 0}, {"cid": 108186371, "mcc": 460, "lac": 23, "mnc": 0}, ] q = Queue() for code in cell_codes: q.put(code) # print('qsize={}'.format(q.qsize())) while q.qsize() > 0: current_code = q.get() print('>>>>>>current_code={}'.format(current_code)) d = spider(current_code) if isinstance(d, dict): print(d) elif d == 403 and d in ERR_CODES.keys(): q.put(current_code) print('sleep {}s'.format(_sleep_time)) time.sleep(_sleep_time)
class Channel: def __init__(self, name: str, consumer: BrightsideConsumer) -> None: self._consumer = consumer self._name = ChannelName(name) self._queue = Queue() self._state = ChannelState.initialized def acknowledge(self, message: BrightsideMessage): self._consumer.acknowledge(message) @property def length(self) -> int: return self._queue.qsize() def name(self) -> ChannelName: return self._name def receive(self, timeout: int) -> BrightsideMessage: if self._state is ChannelState.initialized: self._state = ChannelState.started if not self._queue.empty(): return self._queue.get(block=True, timeout=timeout) return self._consumer.receive(timeout=timeout) @property def state(self) -> ChannelState: return self._state def stop(self): self._queue.put(BrightsideMessageFactory.create_quit_message()) self._state = ChannelState.stopping
def loop(output, input, translator): q = Queue() input.input_queue = q input.start() dt = 0 last_time = datetime.now() running = True while running: now = datetime.now() dt = (now - last_time).total_seconds() last_time = now while q.qsize() > 0: action = Action(*q.get()) fn, args = translator.do(action) if fn == "halt": running = False for pin in translator.pins: output.off(pin) else: getattr(output, fn)(*args) output.update(dt) output.close()
def crawler(start_link = START_LINK , max_crawled = MAX_CRAWLED): links_viewed = [] result = {} result['content'] = [] linkqueue = Queue(0) linkqueue.put(start_link) counter = 0 while counter < max_crawled and linkqueue.qsize() > 0: try: newLink = linkqueue.get() if newLink in links_viewed: continue links_viewed.append(newLink) new_result = pageParser(newLink) except Exception: print ("some error happend during fetching a link here") continue '''if isDuplicate(new_result['body'] , result['content']): continue''' result['content'].append(new_result) counter = counter + 1 for i in range(0,len(new_result['links'])): linkqueue.put(LINK_PREFIX + new_result['links'][i]) new_result['links'][i] = LINK_PREFIX + new_result['links'][i] result = updateRanks(result) print (" finished fetching ",counter,"pages.") variable = json.dumps(result) file = open(configs.JSON_DIR , 'w') file.write(variable) return result
def execute_jobs(jobs, show_progress=False, number_of_workers=10, debug_jobs=False): if debug_jobs: logger.setLevel(logging.DEBUG) results = [] job_queue = Queue() result_queue = Queue() # create worker threads workers = [] for _ in range(min(number_of_workers, len(jobs))): worker = Worker(job_queue, result_queue) workers.append(worker) # fill job_queue with jobs for each worker pending_jobs = list(jobs) running_job_paths = [] while job_queue.qsize() < len(workers): job = pending_jobs.pop(0) running_job_paths.append(job['client'].path) logger.debug("started '%s'" % job['client'].path) job_queue.put(job) logger.debug('ongoing %s' % running_job_paths) # start all workers [w.start() for w in workers] # collect results while len(results) < len(jobs): (job, result) = result_queue.get() logger.debug("finished '%s'" % job['client'].path) running_job_paths.remove(result['job']['client'].path) if show_progress and len(jobs) > 1: if result['returncode'] == NotImplemented: sys.stdout.write('s') elif result['returncode']: sys.stdout.write('E') else: sys.stdout.write('.') if debug_jobs: sys.stdout.write('\n') sys.stdout.flush() result.update(job) results.append(result) if pending_jobs: job = pending_jobs.pop(0) running_job_paths.append(job['client'].path) logger.debug("started '%s'" % job['client'].path) job_queue.put(job) if running_job_paths: logger.debug('ongoing %s' % running_job_paths) if show_progress and len(jobs) > 1 and not debug_jobs: print('') # finish progress line # join all workers for w in workers: w.done = True [w.join() for w in workers] return results
def checksum_dir(src, local_manifest, num_threads=10): """ Walk the source folder, and compute the SHA512 Args: src: Returns: """ known = {} logger.info("Opening up %s" % local_manifest) with open(local_manifest,'r') as f: for line in f: (sha512,path) = line.split(' ',1) path = path.lstrip('*') path = path.rstrip() known[path] = sha512 logger.info("Loaded %i known hashes." % len(known)) client = hdfs.InsecureClient('http://hdfs.gtw.wa.bl.uk:14000', user='******') #client = hdfs.InsecureClient('http://dls.httpfs.wa.bl.uk:14000', user='******') wq = Queue() print("Scanning %s" % src) sames = 0 misses = 0 for (path, dirs, files) in client.walk(src): # Loop through the files: i = 0 for file in files: srcpath = posixpath.join(path,file) if srcpath in known: logger.info("Hash for %s is already known." % srcpath) continue srcstatus = client.status(srcpath) srchash = client.checksum(srcpath) if len(srchash['bytes']) != 64 or srchash['bytes'] == bytearray(64): raise Exception("Got nonsense hash %s" % srchash) logger.info("Queueing %s" %(srcpath)) wq.put(srcpath) break logger.info("Launching %i workers to process %i files..." % (num_threads, wq.qsize()) ) results = [] for i in range(num_threads): worker = Thread(target=generate_checksums, args=(wq, results)) worker.setDaemon(True) worker.start() # Wait for the queue to be processed: wq.join() logger.info("Appending results to %s" % local_manifest) for r in results: if r.hash: with open(local_manifest, 'a') as f: print("%s *%s" % (r.hash, r.path), file=f)
def process_multi_threaded(self, process_queue): """ Starts the multithreaded process of retrieving search data from the system """ # First, make a list of threads to keep count threads = [] # Make a results queue result_queue = Queue() # First, we need to loop over the result queue for i in range(process_queue.qsize()): # Make a new thread t = Thread(target=perform_search, args=(process_queue.get(), result_queue, )) threads.append(t) t.start() # Then, loop through the threads list and wait for all to end for i in range(len(threads)): # Join the threads threads[i].join() print("All processing threads finished") print(result_queue.qsize()) # Then, return the queue return result_queue
class Kernel(Thread): def __init__(self): Thread.__init__(self) self.programsQueue = Queue() self.isFirstLoad = True self.shouldShutDown= False def initializeKernel(self, clock, programloader, scheduler): self.programLoader = programloader self.scheduler = scheduler self.clock = clock def load(self, program): # Sets a program that the program loader will load to the memory self.programsQueue.put_nowait(program) def run(self): Thread.run(self) while not self.shouldShutDown: if not self.programsQueue.qsize() == 0: program = self.programsQueue.get_nowait() self.isFirstLoad = len(self.programLoader.pcbTable.pcbs) == 0 self.programLoader.load(program) if self.isFirstLoad: self.scheduler.setNextPcbToCpu()
def search_multithreaded(self, query_text, limit=0): """ Performs the search in multithreaded manner Searching for 1-100, then 101-200, 201-300, etc... """ # list to keep track of all the threads threads = [] # Make the results queue result_queue = Queue() # Limit the query if wanted if limit == 0: limit = int(self._threads) # First, initialize all the threads for i in range(limit): t = Thread(target=create_and_perform_query, args=(query_text, result_queue, (i*100 + 1),)) threads.append(t) t.start() # Then, loop through all the results, ordering to wait until all are done for i in range(len(threads)): threads[i].join() print("All search threads finished") print(result_queue.qsize()) # Then, return the queue return result_queue
def main(): """ Function makes whole job. """ queue = Queue() pages = (URL + str(i + 1) for i in range(44)) t0 = time() with Pool(10) as p: for links in p.imap_unordered(get_links, pages): for link in links: queue.put(link) t1 = time() with Pool(20) as p: for entry in p.imap_unordered(get_entry, drain(queue)): queue.put(entry) t2 = time() print() print(t1 - t0) print('entries:', queue.qsize()) print(t2 - t1) with open('data.json', 'w') as f: json.dump(queue2list(queue), f)
class BlockingShellSocketChannel(ShellSocketChannel): def __init__(self, context, session, address=None): super(BlockingShellSocketChannel, self).__init__(context, session, address) self._in_queue = Queue() def call_handlers(self, msg): #io.rprint('[[Shell]]', msg) # dbg self._in_queue.put(msg) def msg_ready(self): """Is there a message that has been received?""" if self._in_queue.qsize() == 0: return False else: return True def get_msg(self, block=True, timeout=None): """Get a message if there is one that is ready.""" return self._in_queue.get(block, timeout) def get_msgs(self): """Get all messages that are currently ready.""" msgs = [] while True: try: msgs.append(self.get_msg(block=False)) except Empty: break return msgs
class TrapUnitContainer(FUnitContainer): def __init__(self, configuration, machine): super().__init__(configuration, machine) self.funits = [TrapUnit(machine) for i in range(self.numUnits)] self.trapQueue = Queue() def issue(self, instr): if instr.strOpcode not in self.instructions: return False if instr.funCode == 0: self.machine.haltIssued = True log("Halt issued.") if not self.hasOpenRStation(): return False rStation = self.getOpenRStation() rStation.issue(instr) self.trapQueue.put(rStation) return True def execute(self): if not self.trapQueue.qsize(): return nextTrap = self.trapQueue.queue[0] if nextTrap.readyToExecute(): log('{0} beginning execution.'.format(nextTrap.name)) nextTrap.beginExecution() elif nextTrap.executing and nextTrap.execTime > 0: nextTrap.decreaseExecTime() log('{0} continuing execution. Time left: {1}'.format(nextTrap.name, nextTrap.execTime)) elif nextTrap.execTime == 0 and not nextTrap.resultReady: log('{0} completing execution.'.format(nextTrap.name)) nextTrap.computeResult() _ = self.trapQueue.get()
def monitor(self, core, mem): startTime = time.time() avgMH = Queue() while ((time.time() - startTime) < self.monitorTime): time.sleep(5) devInfo = self.api.getGPUInfo(self.device) if devInfo['Temperature'] >= self.maxTemp: self.handleBadClocks('Temperature threshold reached.', devInfo) return True if devInfo['HWE'] > self.HWE: self.HWE = devInfo['HWE'] self.handleBadClocks('Hardware errors found.', devInfo) #Make sure we give the GPU time to set the new clocks so we get the final HW error count time.sleep(2) devInfo = self.api.getGPUInfo(self.device) self.HWE = devInfo['HWE'] return True avgMH.put(devInfo['MH']) if (avgMH.qsize() >= 3): avgMH.get() #MH added should be averaged totalMH = 0 numMH = 0 while (not avgMH.empty()): totalMH += avgMH.get() numMH += 1 avg = totalMH/numMH newrec = {'device': self.device, 'core': core, 'mem': mem, 'success': True, 'MH': avg, 'temp': devInfo['Temperature']} self.results.append(newrec) self.logger.addRecord(newrec) return False
def using_queue(values): queue = Queue() for x in values: queue.put(x) while queue.qsize() > 1: pair = (queue.get(), queue.get()) queue.put(pair) return queue.get()
class Open163Parser(object): """Get course videoe URLs from www.open.163.com""" def __init__(self, courseURL): self.courseURL = courseURL self.resQ = Queue() def _parseSrtURL(self, srtURL): """ 接受匹配到的双语字幕URL为参数,提取出中英文字幕URL。 如果只需要双语字幕则不需要调用此接口。 """ encodedQuery = urlparse.urlsplit(srtURL).query # note the title of srt is encoded in GB2312. srtInfo = urlparse.parse_qs(encodedQuery, encoding='gb2312') srtInfo["cn-en"] = srtURL return srtInfo def getLectureURLs(self, courseURL): """ 根据课程主页面提取出每一节课的URL,用于调用 Flvcd API。 """ res = requests.get(courseURL) if res.status_code != 200: print("Unable to find course page.") else: doc = Soup(res.text, "html.parser") lectures = doc.find(id="list2").find_all("td", "u-ctitle") lectURLs = [] for lect in lectures: lectURLs.append(lect.a["href"]) print("Totally ", len(lectURLs), "lectures.") return lectURLs # multithreading methods below: def fillQ(self, queue): """ fill the queue for multithreading. """ lectURLs = self.getLectureURLs(self.courseURL) for url in lectURLs: queue.put(url) def putResult(self, videoInfo): """ put videoInfo into result queue. """ self.resQ.put(videoInfo) def getResult(self): """ return results. """ videoList = [] while self.resQ.qsize() > 0: videoList.append(self.resQ.get()) return tuple(videoList)
def test_run(self): """ Test drain's run function (in a same thread) """ source = range(5) destination = Queue() drain = Drain(source, destination) drain.run() assert destination.qsize() == 5
def take_action(self, parsed_args): config = RawConfigParser() # FIXME: support running outside of the site root # FIXME: enforce mode 600! config.read([".simple-cloud-site.cfg"]) source_dir = os.path.realpath(os.curdir) # TODO: allow provider configuration container_name = config.get("site", "container") logging.info('Publishing %s to %s', source_dir, container_name) driver, container = get_driver_instance(config, container_name) upload_queue = Queue() workers = [Thread(target=upload_worker, args=(i, upload_queue, config, container_name)) for i in range(8)] remote_objects = {i.name: i.hash for i in container.list_objects()} for f in find_files(source_dir): target_path = f.replace(source_dir, '').lstrip("/") # TODO: load ignore list from site config if target_path.endswith(".scss"): continue if target_path in remote_objects: # TODO: don't read the file twice: with open(f, "rb") as local_file: h = md5() for chunk in iter(lambda: local_file.read(8192), b''): h.update(chunk) if h.hexdigest() == remote_objects[target_path]: continue mime_type, encoding = mimetypes.guess_type(f) if not mime_type: mime_type = 'application/octet-stream' upload_queue.put({'object_name': target_path, 'file_path': f, 'extra': {'content_type': mime_type}}) logging.info('Waiting for %d uploads to complete…', upload_queue.qsize()) for worker in workers: worker.setDaemon(True) worker.start() upload_queue.join() logging.info('Configuring static site…') driver.ex_enable_static_website(container=container, index_file='index.html') driver.ex_set_error_page(container=container, file_name='error.html') driver.enable_container_cdn(container=container) print('CDN URL:', driver.get_container_cdn_url(container=container))
class ConnectionPool(object): """ Simple connection-caching pool implementation. ConnectionPool provides the simplest possible connection pooling, lazily creating new connections if needed as `borrow_connection' is called. Connections are re-added to the pool by `return_connection', unless doing so would exceed the maximum pool size. Example usage: >>> pool = ConnectionPool("localhost", 9160, "Keyspace1") >>> conn = pool.borrow_connection() >>> conn.execute(...) >>> pool.return_connection(conn) """ def __init__(self, hostname, port=9160, keyspace=None, username=None, password=None, decoder=None, max_conns=25, max_idle=5, eviction_delay=10000): self.hostname = hostname self.port = port self.keyspace = keyspace self.username = username self.password = password self.decoder = decoder self.max_conns = max_conns self.max_idle = max_idle self.eviction_delay = eviction_delay self.connections = Queue() self.connections.put(self.__create_connection()) self.eviction = Eviction(self.connections, self.max_idle, self.eviction_delay) def __create_connection(self): return Connection(self.hostname, port=self.port, keyspace=self.keyspace, username=self.username, password=self.password, decoder=self.decoder) def borrow_connection(self): try: connection = self.connections.get(block=False) except Empty: connection = self.__create_connection() return connection def return_connection(self, connection): if self.connections.qsize() > self.max_conns: connection.close() return if not connection.is_open(): return self.connections.put(connection)
def ana(request): ''' res: [ [0, 0, 0, 0, 0, 0, 0, 0], # day 1 court 4,5,6,7 18:00 / court 4,5,6,7 20:00 [0, 0, 0, 0, 0, 0, 0, 0], # day 2 court 4,5,6,7 18:00 / court 4,5,6,7 20:00 ... ] cal: [ { "date": 1, # day 1 "sticks": [0, 0, 0, 0, 0, 0, 0, 0] # just like res "colors": ["", "", "", "", "", "", "", ""] # color of each time slice which depends on the quantity of sticks }, ... ] ''' if 'month' in request.GET and request.GET['month']: requestMonth = int(request.GET['month']) else: requestMonth = (datetime.now() + timedelta(days=31)).month if 'year' in request.GET and request.GET['year']: requestYear = int(request.GET['year']) else: requestYear = (datetime.now() + timedelta(days=31)).year currentYear = datetime.now().year ## for copyright year requestTime = datetime(requestYear, requestMonth, 1) requestDateS = datetime(requestTime.year, requestTime.month, 1).strftime("%Y-%m-%d") ## yyyy-MM-dd requestDateE = datetime( requestTime.year, requestTime.month, calendar.monthrange(requestTime.year, requestTime.month)[1]).strftime( "%Y-%m-%d") ## yyyy-MM-dd monthList = [i for i in range(1, 13)] monthselect = [""] * 12 monthselect[requestMonth - 1] = "selected" ## calendar info weekdayS = calendar.monthrange(requestTime.year, requestTime.month)[0] days = calendar.monthrange(requestTime.year, requestTime.month)[1] weeks = math.ceil((weekdayS + days) / 7) res = [[0] * 8 for i in range(days)] isDrawn = True q = Queue() threads = [] for court in requestvenueId: key = { 'rentDateS': requestDateS, 'rentDateE': requestDateE, 'venueId': court } t = threading.Thread(target=threadAna, args=(q, days, key)) t.start() threads.append(t) for thread in threads: thread.join() for _ in range(q.qsize()): data = q.get() for i in range(days): res[i][data['court'] - 4] = data['res'][i][0] res[i][data['court']] = data['res'][i][1] isDrawn &= data['isDrawn'] ## calendar cal = [[{ "date": 0, "sticks": [], "colors": [], "colColor": False } for _ in range(7)] for _ in range(weeks)] for i in range(1, days + 1): color = res[i - 1].copy() for j in range(8): if color[j] == 0: color[j] = "" elif color[j] < 5: color[j] = "green" elif color[j] < 10: color[j] = "orange" else: color[j] = "red" cal[(i + weekdayS - 1) // 7][(i + weekdayS - 1) % 7] = { "date": i, "sticks": res[i - 1], "colors": color, "colColor": (i + weekdayS - 1) % 7 in [0, 4] } return render(request, 'home/ana.html', locals())
class Takeover(Module): """ OneForAll多线程子域接管风险检查模块 Example: python3 takeover.py --target www.example.com --format csv run python3 takeover.py --target ./subdomains.txt --thread 10 run Note: 参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html', 'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods' 参数path默认None使用OneForAll结果目录生成路径 :param any target: 单个子域或者每行一个子域的文件路径(必需参数) :param int thread: 线程数(默认100) :param str format: 导出格式(默认csv) :param str path: 导出路径(默认None) """ def __init__(self, target, thread=100, path=None, format='csv'): Module.__init__(self) self.subdomains = set() self.module = 'Check' self.source = 'Takeover' self.target = target self.thread = thread self.path = path self.format = format self.fingerprints = None self.subdomainq = Queue() self.cnames = list() self.results = Dataset() def save(self): logger.log('DEBUG', '正在保存检查结果') if self.format == 'txt': data = str(self.results) else: data = self.results.export(self.format) utils.save_data(self.path, data) def compare(self, subdomain, cname, responses): domain_resp = self.get('http://' + subdomain, check=False) cname_resp = self.get('http://' + cname, check=False) if domain_resp is None or cname_resp is None: return for resp in responses: if resp in domain_resp.text and resp in cname_resp.text: logger.log('ALERT', f'{subdomain}存在子域接管风险') self.results.append([subdomain, cname]) break def worker(self, subdomain): cname = get_cname(subdomain) if cname is None: return maindomain = get_maindomain(cname) for fingerprint in self.fingerprints: cnames = fingerprint.get('cname') if maindomain not in cnames: continue responses = fingerprint.get('response') self.compare(subdomain, cname, responses) def check(self): while not self.subdomainq.empty(): # 保证域名队列遍历结束后能退出线程 subdomain = self.subdomainq.get() # 从队列中获取域名 self.worker(subdomain) self.subdomainq.task_done() def progress(self): # 设置进度 bar = tqdm() bar.total = len(self.subdomains) bar.desc = 'Progress' bar.ncols = 60 while True: done = bar.total - self.subdomainq.qsize() bar.n = done bar.update() if done == bar.total: # 完成队列中所有子域的检查退出 break # bar.close() def run(self): start = time.time() logger.log('INFOR', f'开始执行{self.source}模块') self.subdomains = utils.get_domains(self.target) self.format = utils.check_format(self.format, len(self.subdomains)) timestamp = utils.get_timestamp() name = f'all_subdomain_{timestamp}' self.path = utils.check_path(self.path, name, self.format) if self.subdomains: logger.log('INFOR', f'正在检查子域接管风险') self.fingerprints = get_fingerprint() self.results.headers = ['subdomain', 'cname'] # 创建待检查的子域队列 for domain in self.subdomains: self.subdomainq.put(domain) # 检查线程 for _ in range(self.thread): check_thread = Thread(target=self.check, daemon=True) check_thread.start() # 进度线程 progress_thread = Thread(target=self.progress, daemon=True) progress_thread.start() self.subdomainq.join() self.save() else: logger.log('FATAL', f'获取域名失败') end = time.time() elapsed = round(end - start, 1) logger.log( 'INFOR', f'{self.source}模块耗时{elapsed}秒' f'发现{len(self.results)}个子域存在接管风险') logger.log('DEBUG', f'结束执行{self.source}模块')
class QAPIWorker(object): """QAPI Worker is one IoticAgent instance """ def __init__(self, details, managerStop, keepFeeddata=DATA_KEEP, keepControlreq=DATA_KEEP, keepUnsolicited=DATA_KEEP, sleepOnIdle=SLEEP_ON_IDLE): # self.__details = details self.__stop = Event() self.__managerStop = managerStop self.__keepFeeddata = 0 try: self.__keepFeeddata = int(keepFeeddata) except: logger.warning("QAPIWorker failed to int keepFeeddata '%s'", keepFeeddata) self.__keepControlreq = 0 try: self.__keepControlreq = int(keepControlreq) except: pass self.__keepUnsolicited = 0 try: self.__keepUnsolicited = int(keepUnsolicited) except: pass # self.__qFeeddata = Queue() self.__qControlreq = Queue() self.__qUnsolicited = Queue() # self.__thread = None self.__qc = None # IoticAgent.Core.Client instance # self.__sleep_on_idle = sleepOnIdle self.__qc_last = 0 # Last time qc was used for a request self.__qc_running = False # QC is sleeping? (started (True) or stopped (False)) # self.__polltime = 5 self.__dead_max = 6 # How many times to try to start a dead worker? self.__dead_sleep = 5 # How long to sleep between worker.start attempts? # # IoticAgent Core throttle settings self.__queue_size = 128 if 'queue_size' in details: self.__queue_size = details['queue_size'] self.__throttle_conf = '540/30,1890/300' if 'throttle' in details: self.__throttle_conf = details['throttle'] # self.__vhost = 'container1' if 'vhost' in details: self.__vhost = details['vhost'] # self.__sslca = None if 'sslca' in details: self.__sslca = details['sslca'] # self.__prefix = '' if 'prefix' in details: self.__prefix = details['prefix'] def check_details(self, details): for ek in details: if ek in self.__details: if self.__details[ek] != details[ek]: return False return True def check_authtoken(self, authToken): if 'authtokens' in self.__details: tokens = self.__details['authtokens'].strip().split("\n") for et in tokens: if et == authToken: return True elif authToken == self.__details[ 'token']: # todo: Allow Agent token as API Key ?? return True return False def start(self): self.__thread = Thread(target=self.__run) self.__thread.start() def stop(self): self.__stop.set() self.__thread.join() def __wake_agent(self): if not self.__qc_running: logger.info("QAPIWorker %s Waking", self.__details['epid']) self.__qc.start() self.__qc_running = True self.__qc_last = monotonic() @property def default_lang(self): return self.__qc.default_lang def request_entity_create(self, lid, tepid=None): self.__wake_agent() return self.__qc.request_entity_create(lid, epId=tepid) def request_entity_rename(self, lid, newlid): self.__wake_agent() return self.__qc.request_entity_rename(lid, newlid) def request_entity_reassign(self, lid, nepid=None): self.__wake_agent() return self.__qc.request_entity_reassign(lid, nepid) def request_entity_delete(self, lid): self.__wake_agent() return self.__qc.request_entity_delete(lid) def request_entity_list(self, limit=500, offset=0): self.__wake_agent() return self.__qc.request_entity_list(limit=limit, offset=offset) def request_entity_list_all(self, limit=500, offset=0): self.__wake_agent() return self.__qc.request_entity_list_all(limit=limit, offset=offset) def request_entity_meta_get(self, lid, fmt="n3"): self.__wake_agent() return self.__qc.request_entity_meta_get(lid, fmt=fmt) def request_entity_meta_set(self, lid, meta, fmt="n3"): self.__wake_agent() return self.__qc.request_entity_meta_set(lid, meta, fmt=fmt) def request_entity_meta_setpublic(self, lid, public=True): self.__wake_agent() return self.__qc.request_entity_meta_setpublic(lid, public=public) def request_entity_tag_update(self, lid, tags, delete=False): self.__wake_agent() return self.__qc.request_entity_tag_update(lid, tags, delete=delete) def request_entity_tag_list(self, lid, limit=100, offset=0): self.__wake_agent() return self.__qc.request_entity_tag_list(lid, limit=limit, offset=offset) def request_point_create(self, foc, lid, pid, control_cb=None, save_recent=0): self.__wake_agent() return self.__qc.request_point_create(foc, lid, pid, control_cb=control_cb, save_recent=save_recent) def request_point_rename(self, foc, lid, pid, newpid): self.__wake_agent() return self.__qc.request_point_rename(foc, lid, pid, newpid) def request_point_confirm_tell(self, foc, lid, pid, success=True, requestId=None): self.__wake_agent() return self.__qc.request_point_confirm_tell(foc, lid, pid, success=success, requestId=requestId) def request_point_delete(self, foc, lid, pid): self.__wake_agent() return self.__qc.request_point_delete(foc, lid, pid) def request_point_list(self, foc, lid, limit=500, offset=0): self.__wake_agent() return self.__qc.request_point_list(foc, lid, limit=limit, offset=offset) def request_point_list_detailed(self, foc, lid, pid): self.__wake_agent() return self.__qc.request_point_list_detailed(foc, lid, pid) def request_point_meta_get(self, foc, lid, pid, fmt="n3"): self.__wake_agent() return self.__qc.request_point_meta_get(foc, lid, pid, fmt=fmt) def request_point_meta_set(self, foc, lid, pid, meta, fmt="n3"): self.__wake_agent() return self.__qc.request_point_meta_set(foc, lid, pid, meta, fmt=fmt) def request_point_value_create(self, lid, pid, foc, label, vtype, lang=None, comment=None, unit=None): self.__wake_agent() return self.__qc.request_point_value_create(lid, pid, foc, label, vtype, lang=lang, comment=comment, unit=unit) def request_point_value_delete(self, lid, pid, foc, label=None): self.__wake_agent() return self.__qc.request_point_value_delete(lid, pid, foc, label=label) def request_point_value_list(self, lid, pid, foc, limit=500, offset=0): self.__wake_agent() return self.__qc.request_point_value_list(lid, pid, foc, limit=limit, offset=offset) def request_point_tag_update(self, foc, lid, pid, tags, delete=False): self.__wake_agent() return self.__qc.request_point_tag_update(foc, lid, pid, tags, delete=delete) def request_point_tag_list(self, foc, lid, pid, limit=500, offset=0): self.__wake_agent() return self.__qc.request_point_tag_list(foc, lid, pid, limit=limit, offset=offset) def request_sub_create(self, lid, foc, gpid, callback=None): self.__wake_agent() return self.__qc.request_sub_create(lid, foc, gpid, callback=callback) def request_sub_create_local(self, slid, foc, lid, pid, callback=None): self.__wake_agent() return self.__qc.request_sub_create_local(slid, foc, lid, pid, callback=callback) def request_point_share(self, lid, pid, data, mime=None): self.__wake_agent() return self.__qc.request_point_share(lid, pid, data, mime=mime) def request_sub_ask(self, sub_id, data, mime=None): self.__wake_agent() return self.__qc.request_sub_ask(sub_id, data, mime=mime) def request_sub_tell(self, sub_id, data, timeout, mime=None): self.__wake_agent() return self.__qc.request_sub_tell(sub_id, data, timeout, mime=mime) def request_sub_delete(self, sub_id): self.__wake_agent() return self.__qc.request_sub_delete(sub_id) def request_sub_list(self, lid, limit=500, offset=0): self.__wake_agent() return self.__qc.request_sub_list(lid, limit=limit, offset=offset) def request_sub_recent(self, sub_id, count=None): self.__wake_agent() return self.__qc.request_sub_recent(sub_id, count=count) def request_search(self, text=None, lang=None, location=None, unit=None, limit=100, offset=0, type_='full', scope=IoticAgentCore.Const.SearchScope.PUBLIC): self.__wake_agent() return self.__qc.request_search(text=text, lang=lang, location=location, unit=unit, limit=limit, offset=offset, type_=type_, scope=scope) def request_describe(self, guid, scope=IoticAgentCore.Const.DescribeScope.AUTO): self.__wake_agent() return self.__qc.request_describe(guid, scope=scope) def __cb_feeddata(self, data): if self.__keepFeeddata == 0: return while self.__qFeeddata.qsize() > self.__keepFeeddata: self.__qFeeddata.get() # throw away element self.__qFeeddata.put(data) def get_feeddata(self): ret = [] while self.__qFeeddata.qsize() > 0: ret.append(self.__qFeeddata.get()) return ret def __cb_controlreq(self, data): if self.__keepControlreq == 0: return while self.__qControlreq.qsize() > self.__keepControlreq: self.__qControlreq.get() # throw away element self.__qControlreq.put(data) def get_controlreq(self): ret = [] while self.__qControlreq.qsize() > 0: ret.append(self.__qControlreq.get()) return ret def __cb_unsolicited(self, data): if self.__keepUnsolicited == 0: return while self.__qUnsolicited.qsize() > self.__keepUnsolicited: self.__qUnsolicited.get() # throw away element self.__qUnsolicited.put(data) def get_unsolicited(self): ret = [] while self.__qUnsolicited.qsize() > 0: ret.append(self.__qUnsolicited.get()) return ret def __run(self): # self.__qc = IoticAgentCore.Client(self.__details['host'], self.__vhost, self.__details['epid'], self.__details['passwd'], self.__details['token'], prefix=self.__prefix, sslca=self.__sslca, send_queue_size=self.__queue_size, throttle_conf=self.__throttle_conf) # network_retry_timeout=10, # todo: override in config ? # # Keep trying to start the worker dead_count = 0 done = False while not done and not self.__stop.is_set() and\ not self.__managerStop.is_set() and\ dead_count < self.__dead_max: done = True try: self.__wake_agent() except: logger.error("Worker %s FAILED TO START sleep(%i)...", self.__details['epid'], self.__dead_sleep) done = False dead_count += 1 sleep(self.__dead_sleep) if dead_count >= self.__dead_max: return # self.__qc.register_callback_feeddata(self.__cb_feeddata) self.__qc.register_callback_controlreq(self.__cb_controlreq) self.__qc.register_callback_reassigned(self.__cb_unsolicited) self.__qc.register_callback_subscription(self.__cb_unsolicited) # while not self.__stop.is_set() and not self.__managerStop.is_set(): logger.debug("QAPIWorker %s still running", self.__details['epid']) self.__stop.wait(self.__polltime) # if self.__qc_running and (monotonic() - self.__qc_last > self.__sleep_on_idle): logger.info("QAPIWorker %s Sleeping", self.__details['epid']) self.__qc.stop() self.__qc_running = False # Clean-up try: self.__qc.stop() except: pass
def crawl(seeds, username, password, site_name, config, outf=None, dout=None, ngout=None): 'Crawl CDP/LLDP Neighbors to build a topology' # Queue for devices to scrape next q = Queue() # Queue for neighbor output from threads out_q = Queue() # Visited list for loop detection visited = list() # All Neighbor Entries neighbors = list() # Device entries for connection details (ipv4, os etc) devices = dict() # Thread tracking qtrack = dict() # Thread previous join attempts joined = list() # Distance tracking distances = dict() # Counter crawl_count = 0 iteration_count = 0 # Queue up seed devices for s in seeds: q.put(s) devices[s] = dict() devices[s]['remote_device_id'] = s devices[s]['ipv4'] = s devices[s]['os'] = config['main']['seed_os'] devices[s]['platform'] = 'Unknown' devices[s]['logged_in'] = True distances[s] = 0 # Outer Queue, starts inner queue and then adds all unvisited neighbors to queue when # inner queue is empty. Each iteration of outer queue visits all next level neighbors # at once inside inner queue via threads. while not q.empty(): iteration_count += 1 cqsize = q.qsize() if not config['main']['quiet']: if int(config['main'] ['log_level']) >= logging.WARNING and iteration_count > 1: pbar = tqdm(total=cqsize, unit='dev') pbar.set_description('Iteration %s' % str(iteration_count)) # Launch threads on everything in queue to scrape while not q.empty(): current = q.get() qsize = q.qsize() # Progress bar on warning level or above if not config['main']['quiet']: if int(config['main']['log_level'] ) >= logging.WARNING and iteration_count > 1: p_int = (cqsize - qsize) pbar.update(1) print('\r', end='') if crawl_count > int(config['main']['max_crawl']): logger.warning('Max Devices allowed already crawled') # Only scrape unvisited devices elif current not in visited: crawl_count += 1 visited.append(current) while threading.activeCount() > int( config['main']['thread_count']): qsize = q.qsize() logger.debug('Waiting for free thread - Q Size: %s', str(qsize)) sleep(1) # Throttle connections sleep(0.1) logger.info('Processing %s', current) # Start thread to scrape devices nd_thread = threading.Thread(target=gather_nd, \ kwargs={"device": devices[current], "username": username, \ "password": password, "out_q": out_q, \ "qtrack": qtrack}) nd_thread.start() # Join all threads from last iteration and warn if problems joining threads logger.info('Joining all active threads') main_thread = threading.currentThread() wait_timer = 15 for some_thread in threading.enumerate(): if some_thread != main_thread: tid = str(some_thread.ident) if tid in qtrack: tid = qtrack[tid] if tid not in joined: joined.append(tid) logger.debug('Joining Thread: %s', tid) some_thread.join(timeout=wait_timer) wait_timer = 1 else: logger.info('Thread running long time, ignoring: %s: %s', tid, str(some_thread)) # Process output queue of neighbor data and look for new neighbors to visit logger.info('Processing output queue') while not out_q.empty(): nd = out_q.get() # Gather distance info for n in nd: if n['local_device_id'] not in distances: distances[n['local_device_id']] = 100 if n['remote_device_id'] in distances: if distances[n['local_device_id']] > ( distances[n['remote_device_id']] + 1): distances[n['local_device_id']] = distances[ n['remote_device_id']] + 1 logger.info('Found new distances on %s: %s', n['local_device_id'], \ str(distances[n['remote_device_id']] + 1)) # Save all neighbor data for n in nd: n['distance'] = distances[n['local_device_id']] neighbors.append(n) rname = n['remote_device_id'] # Save device to devices if rname not in devices: devices[rname] = n # Update unknown devices, restore logged_in variable elif devices[rname]['platform'] == 'Unknown': logged_in = False if 'logged_in' in devices[rname]: logged_in = devices[rname]['logged_in'] devices[rname] = n devices[rname]['logged_in'] = logged_in # Save logged_in as False initially, update on another pass if 'logged_in' not in devices[n['local_device_id']]: devices[n['local_device_id']]['logged_in'] = False # Local device always was logged in to devices[n['local_device_id']]['logged_in'] = True logger.info('Processing Out_q entry %s on %s', rname, n['local_device_id']) # New Neighbor that has not been scraped, only scrape IOS/NXOS for now if rname not in visited: if n['os'] == 'cisco_nxos': if rname not in q.queue: q.put(rname) elif n['os'] == 'cisco_ios': if rname not in q.queue: q.put(rname) else: visited.append(rname) else: logger.debug('Already visited %s', rname) # Count Neighbors ncount = 0 for n in neighbors: ncount += 1 logger.info('Total neighbors: %s', str(ncount)) output.output_files(outf, ngout, dout, neighbors, devices, distances, site_name, config)
imgs_folder = 'imgs' dataset = 'controls_popups_dataset.jsonl' # clear results file open(dataset, 'w').close() # create image folder if not exists if not os.path.exists(imgs_folder): os.makedirs(imgs_folder) # Delete all .png files in directory old_files = [ f for f in os.listdir(imgs_folder) if f.endswith(".png") ] for file in old_files: os.remove(os.path.join(imgs_folder, file)) num_threads = 8 for _ in range(num_threads): extractor = ControlsExtractor(imgs_folder, dataset, queue) t = threading.Thread(target=extractor.start) t.daemon = True t.start() while not queue.empty(): print('queue size: ', queue.qsize()) time.sleep(60) queue.join()
class Job51Engine(AbstractEngine): """51job 爬虫""" def __init__(self, **kwargs): self.name = kwargs['name'] self.rawurl = kwargs['url'] self.keywords = "%2520" if kwargs['keywords'] == "" else kwargs['keywords'] self.pagenum = kwargs['pagenum'] self.url = self.rawurl.format(keywords=self.keywords, pagenum=self.pagenum) self.hrefQueue = Queue(maxsize=5000) self.jobinfothreads = [] self.joblisthreads = [] self.totalpage = 0 self.connection = connect( host='localhost', user='******', password='******', db='jobinfo', charset='utf8' ) self.cursor = self.connection.cursor() self.templatesql = "INSERT INTO jobinfo (jid ,jurl ,jpost ,jsalary ,jcompany ,jsummary ,jpostinfo ,jconcatinfo ,jsource) VALUES ( '{jid}','{jurl}','{jpost}','{jsalary}','{jcompany}','{jsummary}', '{jpostinfo}','{jconcatinfo}','{jsource}' )" def execute(self): print("51job 爬虫启动...") indexpage = self._gethtmlpage(url=self.url, encoding='gbk') # 获取总页数 self.totalpage = indexpage.select("#hidTotalPage")[0].attrs['value'] # self.totalpage = 1 self.__pagenum = self.genpagenum(int(self.totalpage)) self._inithreads(self.joblisthreads, self._getjoblist) self._starthreads(self.joblisthreads) self._jointhreads(self.joblisthreads) print(self.hrefQueue.qsize()) self._inithreads(self.jobinfothreads, self._getJobInfo) self._starthreads(self.jobinfothreads) self._jointhreads(self.jobinfothreads) print("51job 爬虫结束...") def _gethtmlpage(self, url, encoding='utf-8'): """ 通过url获取列表页面 返回 BeautifulSoup 对象 """ response = requests.get(url) if 200 != response.status_code: BeautifulSoup("", 'lxml') response.encoding = encoding return BeautifulSoup(response.text, 'lxml') def _getJobInfo(self): """获取职位详情信息""" while True: if self.hrefQueue.empty(): break jobinfourl = self.hrefQueue.get() # print("thread name {} : get href is {}".format(threading.currentThread().getName() ,jobinfourl)) infopage = self._gethtmlpage(jobinfourl, 'gbk') try: company_center = infopage.select("div.tCompany_center.clearfix")[0] header = company_center.select("div.tHeader.tHjob")[0] company_main = company_center.select("div.tCompany_main")[0] except IndexError as identifier: break tBorderTop_boxs = company_main.select("div.tBorderTop_box") # 职位信息 postInfo = tBorderTop_boxs[0] # 联系方式 contactInfo = tBorderTop_boxs[1] # print(("=" * 30) + ">") # print("url: " + jobinfourl) # print("职位: " + header.select("div.cn h1")[0].attrs['title']) # print("薪资: " + header.select("div.cn strong")[0].text) # print("公司: " + header.select("div.cn p.cname a.catn")[0].attrs['title']) # print("摘要信息: " + header.select("div.cn p.msg.ltype")[0].attrs['title']) # print("职位信息: " + self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p"))) # print("联系方式: " + contactInfo.select("div.bmsg.inbox > p.fp")[0].text) # print(("=" * 30) + "<") # job = JobModel() # job.juid = uuid() # job.url = jobinfourl # job.post = header.select("div.cn h1")[0].attrs['title'] # job.salary = header.select("div.cn strong")[0].text # job.company = header.select("div.cn p.cname a.catn")[0].attrs['title'] # job.summary = header.select("div.cn p.msg.ltype")[0].attrs['title'] # job.postinfo = self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p")) # job.concatinfo = contactInfo.select("div.bmsg.inbox > p.fp")[0].text # job.source = 'http://www.51job.com' tempsql = self.templatesql.format( jid = my_uuid(), jurl = jobinfourl, jpost = self._getjpost(header), jsalary = self._getjsalary(header), jcompany = self._getjcompany(header), jsummary = self._getjsummary(header), jpostinfo = str(self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p"))), jconcatinfo = self._getconcatinfo(contactInfo), jsource = 'http://www.51job.com' ) counter_lock2.acquire() self.cursor.execute(tempsql) self.connection.commit() counter_lock2.release() # print("thread name {} : is exit.".format(threading.currentThread().getName())) def _getjsummary(self, header): try: return str(header.select("div.cn p.msg.ltype")[0].attrs['title']) except IndexError as identifier: return "" def _getjcompany(self, header): try: return str(header.select("div.cn p.cname a.catn")[0].attrs['title']) except IndexError as identifier: return "" def _getjsalary(self, header): try: return str(header.select("div.cn strong")[0].text) except IndexError as identifier: return "" def _getjpost(self, header): try: return str(header.select("div.cn h1")[0].attrs['title']) except IndexError as identifier: return "" def _getconcatinfo(self, element): try: return str(element.select("div.bmsg.inbox > p.fp")[0].text) except IndexError as identifier: return "" def _pinjiezhiweixinxi(self, resultset): result = [] for temp in resultset: result.append("\n") result.append(str(temp.text).replace("'","")) return "".join(result) def _getjoblist(self): """获取职位列表""" while True: try: pagenum = next(self.__pagenum) # print("thread name {} : pagenum is {}".format(threading.currentThread().getName() , pagenum)) listpage = self._gethtmlpage( url=self.rawurl.format( keywords=self.keywords, pagenum = pagenum ), encoding='gbk' ) self._parserListpage(listpage) except StopIteration: break # print("thread name {} is exit.".format(threading.currentThread().getName())) def _parserListpage(self, listpage): table = listpage.select("div#resultList.dw_table div.el p.t1 span a") for item in table: self.hrefQueue.put(item.attrs['href']) def _inithreads(self, threadlist, target, threadsize=10): for i in range(threadsize): work = Thread(target=target, name=('job thread %s' % i)) threadlist.append(work) def _starthreads(self, threads): for work in threads: work.start() def _jointhreads(self, threads): for work in threads: work.join()
data_rec.put(data) #print("received"+str(data)) #message=input("->") s.close() #if __name__==('__main__'): # Main() def threader(): while True: Main() t = threading.Thread(target=threader) t.daemon = True t.start() ####################################################### #copy the line above to the top of ur code #the line below is ur code # #it is a example here while True: time.sleep(1) #receive data routine if data_rec.qsize() > 0: print(data_rec.get())
class MainWindow(QWidget): def __init__(self, parent=None): super(MainWindow, self).__init__(parent) # self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint) # self.setAttribute(Qt.WA_TranslucentBackground) self.resize(900, 700) self.__search_mode = { 'fuzzy': 'fuzzy_search', 'precise': 'precise_search', 'reg': 'reg_search' } # self.__pbn_switch_view = None # 创建窗口部件 self.__widget_frame = QLabel() # window title self.__lab_title_fram = QLabel() self.__lab_title = QLabel('搜索辅助工具') self.__lab_title.setAlignment(Qt.AlignCenter) self.__lab_open_tool = QLabel('打开文件方式') self.__ln_open_tool = QLineEdit() self.__pbn_open_tool = QToolButton() self.__pbn_open_tool.setText('选择...') self.__ln_open_tool.setFixedHeight(20) self.__ln_open_tool.setFixedWidth(150) self.__pbn_open_tool.setFixedSize(48, 20) self.__lab_title_fram.setFixedHeight(50) # search mode self.__lab_mode_fram = QLabel() self.__rbn_fuzzy = QRadioButton('模糊搜索') self.__rbn_precise = QRadioButton('精确搜索') self.__rbn_reg = QRadioButton('正则表达式搜索') self.__rbn_fuzzy.setChecked(True) self.__lab_mode_fram.setFixedHeight(22) # search pattern self.__lab_pattern_fram = QLabel() self.__ln_file_name = QLineEdit() self.__ln_file_name.setPlaceholderText('请输入搜索条件或正则表达式......') self.__rbn_reg_Iyes = QRadioButton('区分大小写') self.__rbn_reg_Ino = QRadioButton('不区分大小写') self.__lab_pattern_fram.setFixedHeight(20) # search path self.__lab_path_fram = QLabel() self.__ln_file_path = QLineEdit() self.__ln_file_path.setPlaceholderText('请选择或输入路径......') self.__pbn_file_path = QToolButton() self.__pbn_file_path.setText('浏览...') self.__rbn_search_file = QRadioButton('检索文件名') self.__rbn_search_content = QRadioButton('检索文件内容') self.__pbn_file_path.setFixedSize(48, 20) self.__lab_path_fram.setFixedHeight(20) # search state self.__lab_state_fram = QLabel() self.__lab_state = QLabel('状态:暂无搜索结果!') self.__pbn_search = QPushButton('开始') self.__pbn_stop = QPushButton('停止') self.__pbn_search.setFixedWidth(89) self.__pbn_stop.setFixedWidth(89) self.__lab_state_fram.setFixedHeight(35) # search result self.__tabView = QTabWidget() self.__browser_result = QListWidget() self.__browser_error = QTextBrowser() self.__tabView.addTab(self.__browser_result, '匹配结果') self.__tabView.addTab(self.__browser_error, '错误结果') self.__btn_group_type = QButtonGroup() self.__btn_group_type.addButton(self.__rbn_search_file) self.__btn_group_type.addButton(self.__rbn_search_content) self.__rbn_search_file.setChecked(True) # radiobutton group self.__btn_group_re_I = QButtonGroup() self.__btn_group_re_I.addButton(self.__rbn_reg_Iyes) self.__btn_group_re_I.addButton(self.__rbn_reg_Ino) self.__rbn_reg_Iyes.setChecked(True) # lines ''' self.__line_1 = QFrame() self.__line_1.setFrameStyle(QFrame.HLine | QFrame.Sunken) ''' # 布局 # open tool self.__layout_tool_choose = QHBoxLayout() self.__layout_tool_choose.addWidget(self.__ln_open_tool) self.__layout_tool_choose.addWidget(self.__pbn_open_tool) self.__layout_tool_choose.setSpacing(0) self.__layout_tool_choose.setContentsMargins(0, 0, 0, 0) self.__layout_open_tool = QHBoxLayout() self.__layout_open_tool.addWidget(self.__lab_open_tool) self.__layout_open_tool.addLayout(self.__layout_tool_choose) self.__layout_open_tool.setSpacing(2) # window title self.__layout_title = QHBoxLayout() self.__layout_title.addStretch(5) self.__layout_title.addWidget(self.__lab_title) self.__layout_title.addStretch(1) self.__layout_title.addLayout(self.__layout_open_tool) self.__layout_title.setContentsMargins(0, 0, 0, 20) self.__lab_title_fram.setLayout(self.__layout_title) # search mode self.__layout_search_mode = QHBoxLayout() self.__layout_search_mode.addWidget(self.__rbn_fuzzy) self.__layout_search_mode.addStretch() self.__layout_search_mode.addWidget(self.__rbn_precise) self.__layout_search_mode.addStretch() self.__layout_search_mode.addWidget(self.__rbn_reg) self.__layout_search_mode.setContentsMargins(60, 0, 60, 0) self.__lab_mode_fram.setLayout(self.__layout_search_mode) # search pattern self.__layout_search_reg_I = QHBoxLayout() self.__layout_search_reg_I.addWidget(self.__rbn_reg_Iyes) self.__layout_search_reg_I.addWidget(self.__rbn_reg_Ino) self.__layout_pattern = QHBoxLayout() self.__layout_pattern.addWidget(self.__ln_file_name) self.__layout_pattern.addLayout(self.__layout_search_reg_I) self.__layout_pattern.setContentsMargins(0, 0, 0, 0) self.__lab_pattern_fram.setLayout(self.__layout_pattern) # search path self.__layout_choose_path = QHBoxLayout() self.__layout_choose_path.addWidget(self.__ln_file_path) self.__layout_choose_path.addWidget(self.__pbn_file_path) self.__layout_choose_path.setSpacing(0) self.__layout_path = QHBoxLayout() self.__layout_path.addLayout(self.__layout_choose_path) self.__layout_path.addWidget(self.__rbn_search_file) self.__layout_path.addWidget(self.__rbn_search_content) self.__layout_path.setContentsMargins(0, 0, 0, 0) self.__lab_path_fram.setLayout(self.__layout_path) # search state self.__layout_state = QHBoxLayout() self.__layout_state.addWidget(self.__lab_state) self.__layout_state.addWidget(self.__pbn_search) self.__layout_state.addWidget(self.__pbn_stop) self.__layout_state.setContentsMargins(0, 0, 0, 10) self.__lab_state_fram.setLayout(self.__layout_state) # top layout self.__layout_top = QVBoxLayout() self.__layout_top.addWidget(self.__lab_title_fram) self.__layout_top.addWidget(self.__lab_mode_fram) self.__layout_top.addWidget(self.__lab_pattern_fram) self.__layout_top.addWidget(self.__lab_path_fram) self.__layout_top.addWidget(self.__lab_state_fram) self.__layout_top.addWidget(self.__tabView) self.__layout_top.setSpacing(10) self.__widget_frame.setLayout(self.__layout_top) self.__layout_fram = QGridLayout() self.__layout_fram.addWidget(self.__widget_frame, 0, 0, 1, 1) self.__layout_fram.setContentsMargins(0, 0, 0, 0) self.setLayout(self.__layout_fram) # set object name self.__widget_frame.setObjectName('fram') self.__lab_title.setObjectName('lab_title') self.__ln_open_tool.setObjectName('ln_open_tool') self.__lab_mode_fram.setObjectName('mode_fram') self.__ln_file_name.setObjectName('ln_pattern') self.__ln_file_path.setObjectName('ln_path') self.__lab_state.setObjectName('state') self.__tabView.setObjectName('tabView') self.__browser_result.setObjectName('browser_result') self.__browser_error.setObjectName('browser_error') self.setStyleSheet( '#fram{' 'border-image: url(Images/bg);' '}' '#lab_title{' 'color: white;' 'font-size: 18pt;' '}' '#open_tool{' 'color: black;' '}' '#mode_fram{' # 'border-top: 1px solid rgba(20, 20, 20, 100);' # 'border-bottom: 1px solid rgba(20, 20, 20, 100);' 'background: rgba(0, 0, 0, 40);' '}' '#ln_open_tool, #ln_path{' 'border-top-left-radius: 2px;' 'border-bottom-left-radius: 2px;' '}' '#ln_pattern{' 'border-radius: 2px;' '}' '#state{' 'background: rgba(0, 0, 0, 40);' 'border-radius: 2px;' 'padding: 1px;' 'color: rgb(240, 240, 240);' '}' 'QTabBar::tab {' 'border: 0;' 'width: 90px;' 'height: 20px;' 'margin: 0 2px 0 0;' # top right bottom left # 'border-top-left-radius: 5px;' # 'border-top-right-radius: 5px;' 'color: rgb(200, 255, 255;);' '}' 'QTabBar::tab:selected{' 'background: rgba(25, 0, 0, 40);' 'border-left: 1px solid rgba(255, 255, 255, 200);' 'border-top: 1px solid rgba(255, 255, 255, 200);' 'border-right: 1px solid rgba(255, 255, 255, 200);' '}' 'QTabWidget:pane {' 'border: 1px solid rgba(255, 255, 255, 200);' 'background: rgba(0, 0, 0, 80);' '}' '#browser_result, #browser_error{' 'background: rgba(0, 0, 0, 0);' 'border: 0;' '}' 'QLineEdit{' 'background: rgba(0, 0, 0, 40);' 'border: 1px solid rgba(220, 220, 220, 200);' 'color: white;' 'height: 20px;' '}' 'QPushButton{' 'background: rgba(0, 0, 0, 100);' 'border-radius: 5px;' 'height: 20px;' 'color: white;' '}' 'QPushButton::hover{' 'background: rgba(0, 0, 0, 150);' '}' 'QToolButton{' 'background: rgba(0, 0, 0, 100);' 'color: white;' 'border-top-right-radius: 2px;' 'border-bottom-right-radius: 2px;' '}' 'QToolButton::hover{' 'background: rgba(0, 0, 0, 150);' '}') self.__ln_file_name.setFocus() self.__pbn_search.setShortcut(Qt.Key_Return) # 关联 信号/槽 self.__pbn_file_path.clicked.connect(self.choose_path) self.__pbn_search.clicked.connect(self.pbn_search_clicked) self.__pbn_stop.clicked.connect(self.pbn_stop) self.__pbn_open_tool.clicked.connect(self.choose_open_tool) self.__browser_result.doubleClicked.connect(self.listitem_clicked) # 线程间共享数据队列 queue_size = 10000 self.__queue_result = Queue(queue_size) self.__queue_error = Queue(queue_size) # 标记搜索状态 self.__searching = False # 强制结束子线程 self.__thread_killer = False # 重写鼠标按下事件 def mousePressEvent(self, event): if event.button() == Qt.LeftButton: self.offset = event.globalPos() - self.pos() # 重写鼠标移动事件 def mouseMoveEvent(self, event): if event.buttons() == Qt.LeftButton: self.move(event.globalPos() - self.offset) # 检测记事本程序 def set_open_tool(self): if platform.architecture() == ('32bit', 'WindowsPE'): possible_dir = [ 'C:\\Program Files\\Sublime Text 2', 'C:\\Sublime Text 2', 'D:\\Program Files\\Sublime Text 2', 'D:\\Sublime Text 2', 'E:\\Program Files\\Sublime Text 2', 'E:\\Sublime Text 2', 'F:\\Program Files\\Sublime Text 2', 'F:\\Sublime Text 2', 'C:\\Program Files\\Notepad++', 'C:\\notepad++', 'D:\\Program Files\\Notepad++', 'D:\\notepad++', 'E:\\Program Files\\Notepad++', 'E:\\notepad++', 'F:\\Program Files\\Notepad++', 'F:\\notepad++', 'C:\\Windows\\System32' ] elif platform.architecture() == ('32bit', 'ELF'): possible_dir = ['/usr/bin'] for rootdir in possible_dir: for root, dirs, files in walk(rootdir): for file in files: if file == 'sublime_text.exe' or file == 'notepad++.exe' or file == 'notepad.exe': self.__ln_open_tool.setText(join(root, file)) return # 搜索文件名 def search_from_filename(self, filepath, filename, mode='fuzzy_search', I=True): # check arguments of searching if filepath == '' or not exists(filepath): return False if mode not in self.__search_mode.values(): return False if filename == '': return False # count count = 0 # fuzzy mode if mode == self.__search_mode['fuzzy']: for root, dirs, files in walk(filepath): for each_file in files: # kill subThread if self.__thread_killer == True: return if filename in each_file: count += 1 self.__lab_state.setText('正在搜索......已搜到 %d 个文件' % count) self.__queue_result.put(join(root, each_file)) self.__tabView.setTabText(0, '匹配结果(%d)' % count) # precise mode elif mode == self.__search_mode['precise']: for root, dirs, files in walk(filepath): for each_file in files: # kill subThread if self.__thread_killer == True: return if filename == splitext( each_file)[0] or filename == each_file: count += 1 self.__lab_state.setText('正在搜索......已搜到 %d 个文件' % count) self.__queue_result.put(join(root, each_file)) self.__tabView.setTabText(0, '匹配结果(%d)' % count) # regular expression mode elif mode == self.__search_mode['reg']: if I: pattern = re.compile(r'%s' % filename) else: pattern = re.compile(r'%s' % filename, re.I) for root, dirs, files in walk(filepath): for each_file in files: # kill subThread if self.__thread_killer == True: return if re.search(pattern, each_file): count += 1 self.__lab_state.setText('正在搜索......已搜到 %d 个文件' % count) self.__queue_result.put(join(root, each_file)) self.__tabView.setTabText(0, '匹配结果(%d)' % count) self.__lab_state.setText('搜索完毕! 共搜到 %d 个文件' % count) # finished self.__searching = False # set serching flag # 搜索文件内容 def search_from_content(self, path, content, mode='fuzzy_search', I=True): if path == '' or not exists(path): return False if mode not in self.__search_mode.values(): return False if content == '': return False pass_file_count = 0 error_number = 0 current_file = '' processing_file = '' match_files_count = 0 if mode == self.__search_mode['reg']: if I: pattern = re.compile(r'%s' % content) else: pattern = re.compile(r'%s' % content, re.I) for root, dirs, files in walk(path): for each_file in [ file for file in files if file.endswith('.h') or file.endswith('.cpp') or file.endswith('.cs') ]: current_file = join(root, each_file) pass_file_count += 1 self.__lab_state.setText('正在搜索......%s' % current_file) try: for line_number, line in enumerate(open(current_file)): # kill subThread if self.__thread_killer == True: return if re.search(pattern, line): if processing_file != current_file: self.__queue_result.put('\n%s' % current_file) processing_file = current_file match_files_count += 1 self.__queue_result.put( 'line %s: %s' % (line_number, line.strip())) except Exception as error: self.__queue_error.put("%s\n(%s)\n" % (error, current_file)) pass_file_count -= 1 error_number += 1 continue self.__tabView.setTabText(0, '匹配结果(%d)' % match_files_count) else: for root, dirs, files in walk(path): for each_file in [ file for file in files if file.endswith('.h') or file.endswith('.cpp') or file.endswith('.cs') or file.endswith('.txt') or file.endswith('.py') ]: current_file = join(root, each_file) pass_file_count += 1 self.__lab_state.setText('正在搜索......%s' % current_file) try: for line_number, line in enumerate(open(current_file)): # kill subThread if self.__thread_killer == True: return if content in line: # 匹配成功 if processing_file != current_file: # 如果是新文件 self.__queue_result.put( '\n%s' % current_file) # 文件名入队 processing_file = current_file # 更新文件标记 match_files_count += 1 self.__queue_result.put( 'line %s: %s' % (line_number, line.strip())) # 匹配行入队 except Exception as error: self.__queue_error.put("%s\n(%s)\n" % (error, current_file)) pass_file_count -= 1 error_number += 1 continue self.__tabView.setTabText(0, '匹配结果(%d)' % match_files_count) # self.__queue_result.put() self.__lab_state.setText( '搜索完毕!成功匹配 %d 个文件,处理 %s 个文件,失败 %s 文件。' % (match_files_count, pass_file_count, error_number)) self.__searching = False # 单击选择路径按钮 def choose_path(self): path = QFileDialog.getExistingDirectory() if path != '': path = sep.join(path.split('/')) self.__ln_file_path.setText(path) # 选择打开文件工具 def choose_open_tool(self): path = QFileDialog.getOpenFileName() if path[0] != '': self.__ln_open_tool.setText(path[0]) # 显示搜索结果 def show_search_result(self): """将搜索结果加载到界面,供用户查看和操作""" line_block = [] # 定义临时列表,成批加载,避免刷新频率过高造成界面闪烁 block_size = 10 # 一次性加载的个数 while self.__searching or self.__queue_result.qsize(): # kill subThread if self.__thread_killer == True: return # if self.__searching or self.__queue_result.qsize() >= block_size: // 永远记住这个 bug (生产者-消费者 问题) if self.__queue_result.qsize( ) >= block_size: # 如果队列中不小于 block_size 个项 for i in range(block_size): # 取出 block_size 个项 line_block.append(self.__queue_result.get()) # 出队操作 self.__browser_result.addItems( line_block) # 一次性添加 block_size 个条目 line_block.clear() # 清空临时列表 elif self.__queue_result.qsize() >= 0: # 如果队列中小于 block_size 各项 item = self.__queue_result.get() # 出队一项 self.__browser_result.addItem(QListWidgetItem(item)) # 加载到界面 #self.__browser.setCurrentRow(self.__browser.count()-1) # 设置列表中最后一项为当前项,使列表不停滚动 sleep(0.05) # 给界面事件循环腾出时间,避免界面冻结 #self.__pbn_search.setEnabled(True) # 显示出错结果 def show_error_result(self): """打印略过的文件和出错原因,多为 I/O Error""" count = 0 while self.__queue_error.qsize() or self.__searching: # kill subThread if self.__thread_killer == True: return if self.__queue_error.qsize() <= 0: continue self.__browser_error.append(self.__queue_error.get()) count += 1 self.__tabView.setTabText(1, '错误结果(%d)' % count) # 单击检索按钮 def pbn_search_clicked(self): """To search allow the arguments from UI""" # 获取 UI 数据 file_path = self.__ln_file_path.text() file_name = self.__ln_file_name.text() # 检查参数 if file_path == '': QMessageBox(QMessageBox.Warning, '缺少参数!', '请输入搜索路径!', QMessageBox.Ok, self).exec_() return if file_name == '': QMessageBox(QMessageBox.Warning, '缺少参数!', '请输入匹配条件!', QMessageBox.Ok, self).exec_() return # 判断搜索模式 mode = self.__search_mode['fuzzy'] if self.__rbn_reg.isChecked(): mode = self.__search_mode['reg'] elif self.__rbn_fuzzy.isChecked(): mode = self.__search_mode['fuzzy'] elif self.__rbn_precise.isChecked(): mode = self.__search_mode['precise'] # 大小写敏感标记 I = True if self.__rbn_reg_Ino.isChecked(): I = False self.__browser_result.clear() self.__browser_error.clear() self.__tabView.setTabText(0, '匹配结果(0)') self.__tabView.setTabText(1, '错误结果(0)') self.__searching = True # 开启子线程,后台深度遍历 self.__thread_killer = False if self.__rbn_search_file.isChecked(): self.__lab_state.setText('正在搜索......已搜索到 0 个文件') self.__sub_thread_search = Thread(target=self.search_from_filename, args=(file_path, file_name, mode, I)) self.__sub_thread_search.start() else: self.__lab_state.setText('正在搜索......') self.__sub_thread_search = Thread(target=self.search_from_content, args=(file_path, file_name, mode, I)) self.__sub_thread_search.start() # 开启子线程,显示搜索结果 self.__sub_thread_show_result = Thread(target=self.show_search_result) self.__sub_thread_show_result.start() # 开启子线程,显示错误结果 self.__sub_thread_show_error = Thread(target=self.show_error_result) self.__sub_thread_show_error.start() # self.__pbn_search_file.setEnable(False) # self.__pbn_search_content.setEnable(False) # 单击停止按钮 def pbn_stop(self): if not self.__searching: return self.__thread_killer = True while self.__queue_result.qsize(): self.__queue_result.get() while self.__queue_error.qsize(): self.__queue_error.get() self.__lab_state.setText('搜索已停止!') self.__searching = False # 双击搜索结果 def listitem_clicked(self): """Double click to open the file from search result""" file_path = self.__browser_result.currentItem().text().strip() read_tool = self.__ln_open_tool.text() if not exists(file_path): QMessageBox.warning(self, '错误!', '请双击文件名\n%s 不是文件或打不开!' % file_path, QMessageBox.Ok) return if splitext(file_path)[1] in ['.jpg', '.png', '.jpeg', '.gif']: file_path = r'%s'.replace(' ', r'\ ') % file_path system('%s' % file_path) else: system('"%s" %s' % (read_tool, file_path))
class MinicapStream: __instance = None __mutex = threading.Lock() def __init__(self, host="127.0.0.1", port=1717): self.IP = host # 定义IP self.PORT = port # 监听的端口 self.Pid = 0 # 进程ID self.banner = Banner() # 用于存放banner头信息 # self.minicapSocket = socket.socket(socket.AF_INET,socket.SOCK_STREAM) self.minicapSocket = None self.ReadImageStreamTask = None self.push = None self.picture = Queue() self.data = b'' @staticmethod def getBuilder(): """Return a single instance of TestBuilder object """ if (MinicapStream.__instance == None): MinicapStream.__mutex.acquire() if (MinicapStream.__instance == None): MinicapStream.__instance = MinicapStream() MinicapStream.__mutex.release() return MinicapStream.__instance def get_d(self): print(self.picture.qsize()) def run(self): # 开始执行 # 启动socket连接 p1 = threading.Thread(target=self.ReadImageStream) p1.start() # p1.join() def ReadImageStream(self): self.minicapSocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM) # 定义socket类型,网络通信,TCP self.minicapSocket.connect((self.IP, self.PORT)) # 读取图片流到队列 readBannerBytes = 0 bannerLength = 2 readFrameBytes = 0 frameBodylength = 0 dataBody = b"" while True: reallen = self.minicapSocket.recv(4096) length = len(reallen) if not length: continue cursor = 0 while cursor < length: # 获取图片头部信息 if readBannerBytes < bannerLength: if readBannerBytes == 0: self.banner.Version = reallen[cursor] elif readBannerBytes == 1: bannerLength = reallen[cursor] self.banner.Length = bannerLength elif readBannerBytes in [2, 3, 4, 5]: self.banner.Pid += (reallen[cursor] << ((readBannerBytes - 2) * 8)) >> 0 elif readBannerBytes in [6, 7, 8, 9]: self.banner.RealWidth += (reallen[cursor] << ( (readBannerBytes - 6) * 8)) >> 0 elif readBannerBytes in [10, 11, 12, 13]: self.banner.RealHeight += (reallen[cursor] << ( (readBannerBytes - 10) * 8)) >> 0 elif readBannerBytes in [14, 15, 16, 17]: self.banner.VirtualWidth += (reallen[cursor] << ( (readBannerBytes - 14) * 8)) >> 0 elif readBannerBytes in [18, 19, 20, 21]: self.banner.VirtualHeight += (reallen[cursor] << ( (readBannerBytes - 18) * 8)) >> 0 elif readBannerBytes == 22: self.banner.Orientation = reallen[cursor] * 90 elif readBannerBytes == 23: self.banner.Quirks = reallen[cursor] cursor += 1 readBannerBytes += 1 if readBannerBytes == bannerLength: print(self.banner.toString()) elif readFrameBytes < 4: # 第一个过来的图片信息的前4个字符不是图片的二进制信息,而是携带着图片大小的信息 frameBodylength = frameBodylength + ( (reallen[cursor] << (readFrameBytes * 8)) >> 0) cursor += 1 readFrameBytes += 1 # print('{} - {} '.format(cursor,frameBodylength)) else: # 真正获取图片信息,比如我们接受到的信息长度为n,4~n部分是图片的信息,需要保存下来。 # print('{} - {} - {} '.format(length,cursor, frameBodylength)) if length - cursor >= frameBodylength: dataBody = dataBody + (reallen[cursor:( cursor + frameBodylength)]) if dataBody[0] != 0xFF or dataBody[1] != 0xD8: return self.picture.put(dataBody) # self.save_file('d:/pic.png', dataBody) cursor += frameBodylength frameBodylength = 0 readFrameBytes = 0 dataBody = b"" else: dataBody = dataBody + reallen[cursor:length] frameBodylength -= length - cursor readFrameBytes += length - cursor cursor = length # adb shell LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/minicap -P 1200x1920@1200x1920/0 # adb forward tcp:1313 localabstract:minicap def save_file(self, file_name, data): print(file_name) file = open(file_name, "wb") file.write(data) file.flush() file.close()
def shai_bagons_bad_ass_distributed_space_time_backprojection( hfr_hr_pred, lfr_hr_in, hfr_lr_in, base_folder): # inputs are in H-W-T-C np arrays _rusage('badbp init') t = time.time() if bp_debug: print('-badbp- hfr_hr_pred {}, lfr_hr_in {}, hfr_lr_in {}'.format( hfr_hr_pred.shape if hfr_hr_pred is not None else None, lfr_hr_in.shape if lfr_hr_in is not None else None, hfr_lr_in.shape if hfr_lr_in is not None else None)) rate = hfr_lr_in.shape[2] // lfr_hr_in.shape[2] assert (rate > 1) #print('-badbp- submittinig with temporal sample rate = {}'.format(rate)) max_chunk_size = max( 24 // rate, 1) # number of hfr frames to process at a chunk. at least rate frames tag = '{}/{}x{}x{}/'.format(base_folder, lfr_hr_in.shape[0], lfr_hr_in.shape[1], hfr_lr_in.shape[2]) num_workers = max(8, min(24, lfr_hr_in.shape[2] // max_chunk_size)) cid = 0 fr_ = 0 chunk_q = Queue() jid_q = Queue() err_q = Queue() # global data # allocate room for output hfr_hr_pred = np.zeros([ lfr_hr_in.shape[0], lfr_hr_in.shape[1], hfr_lr_in.shape[2], hfr_lr_in.shape[3] ], dtype=np.float32) _rusage('badbp allocating out') # multi threading submission class Worker(Thread): def __init__(self, inq, outq, errq): super(Worker, self).__init__() self.inq = inq self.outq = outq self.errq = errq self.daemon = True self.start() def run(self): # first part - submit all chunks while True: item = self.inq.get() if item is None: # "stop" signal self.inq.task_done() break fr_, to_, cid = item hfr_lr_folder = '{}-c{}-hfr_lr'.format(tag, cid) _write_chunk(hfr_lr_in[..., fr_ * rate:to_ * rate, :], hfr_lr_folder) lfr_hr_folder = '{}-c{}-lfr_hr'.format(tag, cid) _write_chunk(lfr_hr_in[..., fr_:to_, :], lfr_hr_folder) pyargs = '--lfr_hr_in {} --hfr_lr_in {}'.format( lfr_hr_folder, hfr_lr_folder) if hfr_hr_pred is not None: hfr_hr_folder = '{}-c{}-hfr_hr_in'.format(tag, cid) _write_chunk(hfr_hr_pred[..., fr_ * rate:to_ * rate, :], hfr_hr_folder) pyargs = '{} --hfr_hr_pred {}'.format( pyargs, hfr_hr_folder) output_folder = '{}-c{}-output'.format(tag, cid) pyargs = '{} --output {}'.format(pyargs, output_folder) jid = _submit_job_and_get_jid(pyargs, tag) self.outq.put([jid, output_folder, fr_, to_]) self.inq.task_done() # second part - gather while True: item = self.outq.get() if item is None: # "stop" signal self.outq.task_done() break jid, output_folder, fr_, to_ = item status = _check_job_status(jid) if status == 'done': #print('chunk {}:{} done!'.format(fr_, to_)) hfr_hr_pred[..., rate * fr_:rate * to_, :] = _read_chunk( output_folder, (to_ - fr_) * rate) elif status == 'exit': #print('chunk {}:{} jid={} failed filling in rubish'.format(fr_, to_, jid)) hfr_hr_pred[..., rate * fr_:rate * to_, :] = 0.5 try: with open('{}{}.e'.format(tag, jid), 'r') as R: for l in R.readlines(): z = 0 #print('\t-jid{}-{}'.format(jid, l.rstrip())) except: pass self.errq.put(jid) else: #print('\t jid {} status {}'.format(jid, status)) # resubmit self.outq.put(item) self.outq.task_done() time.sleep(1) # start the workers workers = [Worker(chunk_q, jid_q, err_q) for _ in range(num_workers)] _rusage('badbp workers started') while fr_ < lfr_hr_in.shape[2]: to_ = min(lfr_hr_in.shape[2], fr_ + max_chunk_size) chunk_q.put((fr_, to_, cid)) cid += 1 fr_ = to_ assert (rate * fr_ == hfr_lr_in.shape[2]) # signal workers to move to next stage for _ in range(num_workers): chunk_q.put(None) _rusage('badbp done distributing') chunk_q.join() # wait for all jobs to be submitted #print('-badbp- done submitting all chunks to {} workers \t {:.2f} [sec]'.format(num_workers, time.time() - t)) jid_q.join() # wait for all frames to be collected _rusage('badbp frames collected') # signal workers to move to exit for _ in range(num_workers): jid_q.put(None) jid_q.join() # wait for all threads to get the signal # make sure all threads exited # assert(all([not w_.is_alive() for w_ in workers])) # cleanup if err_q.qsize() == 0: shutil.rmtree(path=tag, ignore_errors=True) else: while True: try: ejid = err_q.get(block=False) except Empty: break err_q.task_done() #print('-badbp- job id {} had an error'.format(ejid)) _rusage('badbp done') #print('-badbp- done \t {:.2f} [sec]'.format(time.time() - t)) return np.clip(hfr_hr_pred, 0., 1.)
class CoreScrapeThread(CoreScrape): """ Core Scrape Thread. Uses multiples threads to request pages and parse its content. A valid rotator must be passed to produce each request using a new proxy and make it less likely to be red flagged as a bot or scrapper by internet service providers. The user could pass a parser (CoreScrape class or custom class with a 'parse' method) to parse the response and avoid having the need to store the whole page for postprocessing. This controller also gives the user the option to set up a timer, in seconds, to raise a timeout. The timer is set if the user provided an integer to param 'timeout' during 'start_threads' method processing. The timer is unset in 'wait_for_threads' method. Params: nthreads: int. Desired number of threads. Once the method 'start_threads' is called, the controller will try to split the given input into chunks of number 'nthreads'. If it is not possible to split in 'nthreads' chunks, then the actual number of threads is available in 'actualnthreads'. rotator: corescrape.proxy.Rotator (preferably). Uses this rotator to make requests using different proxies and user agents. There is always the possibility to pass the 'requests' module to this parameter, but that is not advised as the control of proxies and user-agents is not automatic. parser: corescrape.pgparser.SimpleParser, based on or None. Uses this to parse the page content and extract the useful information, making it less memory expensive. If no argument is given, the thread controller will return a list of the full pages collected. timeout: int or None. Time in seconds to configure the timeout process. Set up a timer to raise an event and stop the threads once the time is reached. logoperator: corescrape.logs.LogOperator or None. Log to be fed with process runtime information. """ def __init__(self, nthreads, rotator, parser=None, timeout=None, logoperator=None): """Constructor.""" if timeout is not None and not isinstance(timeout, int): raise TypeError("Param. 'timeout' must be 'int' or 'NoneType'") # inputs self.nthreads = nthreads self.actualnthreads = nthreads self.rotator = rotator self.parser = parser self.timeout = timeout # CAREFUL! This is not timeout for requests self.timeoutset = False # control attrs self.queue = Queue() self.event = corescrape_event.CoreScrapeEvent(logoperator=logoperator) self.threads = [] super().__init__(logoperator=logoperator) def __split(self, a): """ Tries to split the input into chunks for each thread. Input must be a list. """ if not isinstance(a, list): raise TypeError("Param 'a' must be 'list'") n = self.nthreads # desired number of threads k, m = divmod(len(a), n) split = [a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)] split = [part for part in split if part] # drops empty chunks # actual number of threads. Sometimes differs from 'nthreads' self.actualnthreads = len(split) return split def __warn_wait_threads(self): """Produce warning to wait for threads if needed.""" if self.threads: warn( 'There are threads running. Wait for them to stop before calling ' 'this method' ) return True return False def __set_timeout(self): """ If seconds for timeout were informed in the constructor, will set an alarm for timeout. Once timeout is reached, the iteration is broken and return as expected. """ if self.timeout: signal.signal(signal.SIGALRM, alarm_handler) signal.alarm(self.timeout) self.log('CoreScrapeThread set the timeout for {} seconds.'.format( self.timeout), tmsg='info') self.timeoutset = True def __disarm_timeout(self): """Turn off the timeout.""" if self.timeoutset: self.timeoutset = False signal.alarm(0) self.log('CoreScrapeThread disarmed the timeout.', tmsg='info') def __check_am_i_the_last(self): """Check if this thread is the last and if it should set an event.""" condition = self.queue.qsize() + 1 >= self.actualnthreads condition = condition and self.event.state.is_EXECUTING() if condition: self.event.state.set_DUTY_FREE() def __iterate(self, threadid, data, *args): """Do iterations in threads, each one calling the passed code.""" # pylint: disable=unused-argument self.log('Starting iteration in threadid {} for {} items'.format( threadid, len(data))) res = [] for url in data: # the reason here does not matter. If it is set, break out if self.event.is_set(): break try: page = self.rotator.request(url, self.event, threadid=threadid) except: self.event.state.set_ABORT_THREAD() break if page is None: continue # not able to retrieve the page if self.parser is None: res.append(page) self.log('Storing whole response for {}. Thread {}'.format( url, threadid)) elif page.status_code == 404: self.log('URL {} returned a 404. Thread {}'.format(url, threadid), tmsg='warning') res.append({url: None}) # points it was collected but useless else: _res = self.parser.parse(page, threadid=threadid) if not _res: self.log('URL {} could not be parsed. Thread {}'.format( url, threadid)) continue # no info collected, must go on self.log('URL {} collected. Thread {}'.format(url, threadid), tmsg='header') res.append({url: _res}) self.__check_am_i_the_last() return res def start_threads(self, to_split_params, *fixed_args): """Starts threads.""" def test_if_urls(p): return [a.startswith('http://') or a.startswith('https://') for a in p] # pylint: disable=no-value-for-parameter abort = self.__warn_wait_threads() if abort: return False if not all(test_if_urls(to_split_params)): raise ValueError('List of strings must begin with protocol') self.log('Starting threads for {} items'.format(len(to_split_params))) self.threads = [] self.event.state.set_EXECUTING() for threadid, split in enumerate(self.__split(to_split_params)): pargs = (threadid, split, *fixed_args) thread = Thread( target=lambda q, *args: q.put(self.__iterate(*args)), args=(self.queue, *pargs) ) thread.start() self.threads.append(thread) self.__set_timeout() return True def wait_for_threads(self): """Wait lock for threads.""" try: self.event.wait() except KeyboardInterrupt: self.event.state.set_ABORT_USER() except CoreScrapeTimeout: self.event.state.set_TIMEOUT() finally: self.__disarm_timeout() for thread in self.threads: thread.join() self.event.clear() self.threads = [] def join_responses(self): """Join responses from the threads.""" abort = self.__warn_wait_threads() if abort: return [] res = [] while not self.queue.empty(): res += self.queue.get() return res def is_sentenced(self): """ Informs if the thread controller is sentenced due to the last event state. """ sentenced = self.event.state.is_sentenced() if sentenced: self.event.state.set_FINISHED() return sentenced
class IPReverse(): def __init__(self): self.Threadcount = int(input("Threads?\n")) os.system("cls") self.ProxyLoc = input("Proxyfile?\n") os.system("cls") self.IPListLoc = input("IPList?\n") os.system("cls") choose = input("[1]HTTP/S\n[2]Socks4\n[3]Socks5\n") os.system("cls") if choose == 1: self.ProxyPre = "" elif choose == 2: self.Proxypre = "socks4://" else: self.Proxypre = "socks5://" try: IP = open("ips.txt", "r").readlines() except: print("Invalid IPFile!") pass self.proxys = Queue() self.IPs = Queue() self.PrintList = [] self.currentproxy = "" self.ProxyCount = 0 self.Count = 0 self.Checked = 0 self.Hits = 0 self.Invalid = 0 self.Domains = 0 self.WriteQueue = Queue() self.CPM = 0 try: proxy = open("proxy.txt", "r").readlines() except: print("Invalid Proxyfile!") pass for i in IP: self.IPs.put(i.strip()) for i in proxy: self.proxys.put(self.Proxypre + i.strip()) for i in range(0, self.Threadcount): threading.Thread(target=self.Thread).start() threading.Thread(target=self.CPMCounter).start() threading.Thread(target=self.Writer).start() threading.Thread(target=self.Printer).start() def CPMCounter(self): while True: old = self.Checked time.sleep(1) new = self.Checked self.CPM = int(new - old) * 60 def Lookup(self, ip, proxy): try: resp = requests.get( "https://api.hackertarget.com/reverseiplookup/?q=" + ip, proxies={"https": proxy}, timeout=3) except: return False return resp.text def Writer(self): while True: try: curip = self.WriteQueue.get(timeout=1) except: continue open("Output.txt", "a+").write(curip + "\n") def Printer(self): while True: ctypes.windll.kernel32.SetConsoleTitleW( "Reverse IP Lookup | Made by Nezuko | Proxys remaining: " + str(self.proxys.qsize()) + " | IPs remaining: " + str(self.IPs.qsize()) + " | Hits: " + str(self.Hits) + " | Found Domains: " + str(self.Domains) + " | CPM: " + str(self.CPM)) cur = "" for i in self.PrintList: cur += i + "\n" print(cur) if self.ProxyCount >= 10: self.currentproxy = self.proxys.get() self.ProxyCount = 0 time.sleep(0.1) os.system("cls") def Thread(self): while True: try: curip = self.IPs.get(timeout=1) except: continue if self.ProxyCount != 10: self.ProxyCount += 1 resp = self.Lookup(curip, self.currentproxy) else: time.sleep(0.3) self.ProxyCount += 1 resp = self.Lookup(curip, self.currentproxy) self.Checked += 1 if resp == False or "429 Too Many Requests" in resp: self.IPs.put(curip) continue elif "No DNS A records found for " in resp or resp == "error check your search parameter": self.Invalid += 1 #print(resp.text) else: #print(resp.text) self.Checked += 1 self.Hits += 1 domains = resp.split("\n") domains.pop(0) for m in domains: self.Domains += 1 self.WriteQueue.put(m.strip()) self.PrintList.append(colorama.Fore.GREEN + "Found Domain for " + curip + "!" + colorama.Style.RESET_ALL)
class Brutedomain: def __init__(self, args): self.target_domain = args.domain self.check_env() self.cname_flag = args.cname if not (self.target_domain): print('usage: brutedns.py -h') sys.exit(1) self.level = args.level self.sub_dict = args.sub_file self.speed = args.speed self.default_dns = True if args.default_dns is "y" else False self.next_sub_dict = args.next_sub_file self.other_result = args.other_file self.timeout = 10 self.resolver = dns.resolver.Resolver(configure=self.default_dns) self.resolver.lifetime = self.timeout self.resolver.timeout = self.timeout self.found_count = 0 self.cmdline = "" self.queues = Queue() self.queue_sub = Queue() self.cdn_set = set() self.cname_set = set() self.white_filter_subdomain = set() self.cname_block_dict = dict() self.ip_block_dict = dict() self.ip_all_dict = dict() self.ip_flag_dict = dict() self.active_ip_dict = dict() self.ip_count_dict = dict() self.black_ip_dict = dict() self.ip_subdomain_dict = dict() self.set_next_sub = self.load_next_sub_dict() self.set_cdn = self.load_cdn() self.load_sub_dict_to_queue() self.extract_next_sub_log() self.segment_num = self.judge_speed(self.speed) if not self.default_dns: self.nameservers = self.load_nameservers() self.check_nameservers() def check_env(self): if (not os.path.exists( 'result/{domain}'.format(domain=self.target_domain))): try: os.mkdir('result/{domain}'.format(domain=self.target_domain)) except Exception as e: print(e) sys.exit(1) filename = 'result/{name}/{name}'.format(name=self.target_domain) if os.path.isfile(filename + ".csv"): new_filename = filename + "_" + str( os.stat(filename + ".csv").st_mtime).replace(".", "") os.rename(filename + ".csv", new_filename + ".csv") if os.path.isfile(filename + "_deal.csv"): if not new_filename: new_filename = filename + "_" + str( os.stat(filename + "_deal.csv").st_mtime).replace(".", "") os.rename(filename + "_deal.csv", new_filename + "_deal.csv") if (platform.system() != "Windows"): try: self.cmdline = "\r\n" os.system("ulimit -n 65535") except Exception: pass else: self.cmdline = "\r" def load_cdn(self): cdn_set = set() with open('dict/cdn_servers.txt', 'r') as file_cdn: for cname in file_cdn: cdn_set.add(cname.strip()) return cdn_set def load_next_sub_dict(self): next_sub_set = set() with open(self.next_sub_dict, 'r') as file_next_sub: for next_sub in file_next_sub: next_sub_set.add(next_sub.strip()) return next_sub_set def load_sub_dict_to_queue(self): with open(self.sub_dict, 'r') as file_sub: for sub in file_sub: domain = "{sub}.{target_domain}".format( sub=sub.strip(), target_domain=self.target_domain) self.queues.put(domain) def load_nameservers(self): nameserver_set = set() with open('dict/name_servers.txt', 'r') as nameservers: for nameserver in nameservers: nameserver_set.add(nameserver.strip()) return nameserver_set def load_result_from_other(self): log_type = type(self.other_result) other_subdomain_list = list() if (log_type == str): try: subdomain_log = open( '{target_domain}'.format(target_domain=self.other_result), 'r') other_result = [ subdomain.strip() for subdomain in subdomain_log ] subdomain_log.close() except Exception: print('subdomain log is not exist') sys.exit(1) elif (log_type == list): other_result = self.other_result else: other_result = [] for subdomain in other_result: other_subdomain_list.append(subdomain.strip().strip(".")) return other_subdomain_list def extract_next_sub_log(self): other_subdomain_list = self.load_result_from_other() for subdomain in other_subdomain_list: if (('.' + str(self.target_domain)) in subdomain): self.queues.put(subdomain) subname = subdomain.strip(".").replace(self.target_domain, "").strip(".") if subname != "": sub_list = subname.split(".") for sub in sub_list: self.set_next_sub.add(sub.strip()) def check_nameservers(self): print("[+] Seraching fastest nameserver,it will take a few minutes") server_info = {} i = 0 sys.stdout.write(self.cmdline + '[+] Searching nameserver process:' + str(round(i * 100.00 / len(self.nameservers), 2)) + "% ") sys.stdout.flush() for nameserver in self.nameservers: i = i + 1 self.resolver.nameservers = [nameserver] self.resolver.lifetime = 3 start = time.time() for _ in range(2): random_str = str(random.randint(1, 1000)) domain_list = [ random_str + "testnamservspeed.com" for _ in range(200) ] coroutines = [ gevent.spawn(self.query_domain, l) for l in domain_list ] gevent.joinall(coroutines) end = time.time() cost = end - start server_info[nameserver] = cost sys.stdout.write('\r' + '[+] Searching nameserver process:' + str(round(i * 100.00 / len(self.nameservers), 2)) + "% ") sys.stdout.flush() nameserver = sorted(server_info.items(), key=lambda server_info: server_info[1])[0][0] print(self.cmdline) print("[+] Search completed,fastest nameserver: " + nameserver) self.ip_block_dict = dict() self.cname_block_dict = dict() self.resolver.lifetime = self.timeout self.resolver.nameservers = [nameserver] def check_cdn(self, cname_list, domain): for cname in cname_list: cname = cname.lower().rstrip(".") domain = domain.lower() for cdn in self.set_cdn: if (cdn in cname): return True if (domain in cname): cname_list = cname.split(domain) if (cname_list[1] != ""): self.cdn_set.add(cname_list[1].strip(".")) return True elif ('cdn' in cname or 'cache' in cname): self.cdn_set.add(cname) return True self.cname_set.add(cname) return False def get_type_id(self, name): return dns.rdatatype.from_text(name) def query_domain(self, domain): list_ip, list_cname = [], [] try: record = self.resolver.query(domain) for A_CNAME in record.response.answer: for item in A_CNAME.items: if item.rdtype == self.get_type_id('A'): list_ip.append(str(item)) self.ip_block_dict[domain] = list_ip elif (item.rdtype == self.get_type_id('CNAME')): list_cname.append(str(item)) self.cname_block_dict[domain] = list_cname except dns.exception.Timeout: self.queues.put(domain) except Exception as e: pass def get_block(self): domain_list = list() if (self.queues.qsize() > self.segment_num): for _ in range(self.segment_num): domain_list.append(self.queues.get()) else: for _ in range(self.queues.qsize()): domain_list.append(self.queues.get()) return domain_list def get_black_subdomain(self): temp_list = list() temp_set = set() for subdomain_list in self.black_ip_dict.values(): temp_list.extend(subdomain_list) black_subdomain = set(temp_list) - self.white_filter_subdomain for domain in black_subdomain: for next_sub in self.set_next_sub: subdomain = "{next}.{domain}".format(next=next_sub, domain=domain) temp_set.add(subdomain) return temp_set def judge_speed(self, speed): if (speed == "low"): segment_num = config.low_segment_num elif (speed == "high"): segment_num = config.high_segment_num else: segment_num = config.medium_segment_num return segment_num def generate_sub(self): try: domain = self.queue_sub.get_nowait() for next_sub in self.set_next_sub: subdomain = "{next}.{domain}".format(next=next_sub.strip(), domain=domain) self.queues.put_nowait(subdomain) return True except Exception as e: return False def deweighting_subdomain(self): temp_list = list() for subdomain, ip_list in self.ip_block_dict.items(): ip_str = str(sorted(ip_list)) if ip_str not in self.black_ip_dict.keys(): if (self.ip_count_dict.__contains__(ip_str)): self.ip_subdomain_dict[ip_str].append(subdomain) if (self.ip_count_dict[ip_str] > config.ip_max_count): temp_list.append(subdomain) else: self.ip_count_dict[ ip_str] = self.ip_count_dict[ip_str] + 1 else: self.ip_subdomain_dict[ip_str] = [subdomain] self.ip_count_dict[ip_str] = 1 for ip in ip_list: if ip in config.waiting_fliter_ip: temp_list.append(subdomain) if (IP(ip).iptype() != 'PUBLIC'): temp_list.append(subdomain) else: temp_list.append(subdomain) for ip_str, count in self.ip_count_dict.items(): if (count > 10): i = 0 subdomain_list = self.ip_subdomain_dict[ip_str] min_subdomain = reduce( lambda x, y: x if len(x) < len(y) else y, subdomain_list) for subdomain in subdomain_list: if ("." + min_subdomain in subdomain): i = i + 1 if (i > 10): self.black_ip_dict[ip_str] = subdomain_list break for subdomain_list in self.black_ip_dict.values(): temp_list.extend(subdomain_list) for subdomain in temp_list: try: del self.ip_all_dict[subdomain] except Exception: pass try: del self.cname_block_dict[subdomain] self.white_filter_subdomain.add(subdomain) except Exception: pass try: del self.ip_block_dict[subdomain] except Exception: pass self.found_count = self.ip_all_dict.__len__() self.ip_all_dict.update(self.ip_block_dict) for subdomain, ip_list in self.ip_block_dict.items(): if (subdomain.count(".") < self.level): self.queue_sub.put(subdomain) self.ip_block_dict.clear() def handle_data(self): for subdomain, cname_list in self.cname_block_dict.items(): if (self.check_cdn(cname_list, self.target_domain)): cname_list.append("Yes") else: cname_list.append("No") self.cname_block_dict[subdomain] = cname_list for subdomain, ip_list in self.ip_all_dict.items(): for ip in ip_list: iptype = IP(ip).iptype() if (iptype != 'PUBLIC'): self.ip_all_dict[subdomain] = "{iptype}({ip})".format( iptype=iptype, ip=ip) else: try: key_yes = self.cname_block_dict[subdomain][-1] except KeyError: key_yes = "No" if (key_yes == "No"): CIP = (IP(ip).make_net("255.255.255.0")) if CIP in self.ip_flag_dict: self.ip_flag_dict[CIP] = self.ip_flag_dict[CIP] + 1 else: self.ip_flag_dict[CIP] = 1 if CIP in self.active_ip_dict: active_ip_list = self.active_ip_dict[CIP] if (ip not in active_ip_list): active_ip_list.append(ip) else: active_ip_list = [ip] self.active_ip_dict[CIP] = active_ip_list def raw_write_disk(self): if (not os.path.exists( 'result/{domain}'.format(domain=self.target_domain))): os.mkdir('result/{domain}'.format(domain=self.target_domain)) with open('result/{name}/{name}.csv'.format(name=self.target_domain), 'a') as csvfile: writer = csv.writer(csvfile) writer.writerow(['DOMAIN', 'CDN', "CNAME", 'IP']) for subdomain, ip_list in self.ip_all_dict.items(): try: flag = self.dict_cname_all[subdomain].pop() cname_list = self.cname_block_dict[subdomain] except Exception: flag = "No" cname_list = "Null" writer.writerow([subdomain, flag, cname_list, ip_list]) self.ip_all_dict.clear() self.cname_block_dict.clear() def deal_write_disk(self): ip_flags = sorted(self.ip_flag_dict.items(), key=lambda d: d[1], reverse=True) with open( 'result/{name}/{name}_deal.csv'.format( name=self.target_domain), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['IP', 'frequency', 'active']) for ip_frequency in ip_flags: writer.writerow([ ip_frequency[0], ip_frequency[1], self.active_ip_dict[ip_frequency[0]] ]) def collect_cname(self): with open('result/cname.txt', 'a') as txt: for cname in self.cname_set: flag = False for cdn in self.set_cdn: if (cdn in cname or self.target_domain in cname): flag = True if (flag == False): txt.write('{cname}'.format(cname=cname.strip()) + self.cmdline) with open('result/cdn.txt', 'a') as txt: for cdn in self.cdn_set: txt.write('{cname}'.format(cname=cdn) + self.cmdline) def cmd_print(self, wait_size, start, end, i): scaned = self.segment_num * i cost = end - start sys.stdout.write( "\r" + "[+] Bruting subdomain process domain: {domain} |scaned: {scaned}|found: {found_count} |speed:{velocity} |spend: {spend} min " .format(domain=self.target_domain, scaned=scaned, qsize=wait_size, found_count=self.found_count, velocity=round(scaned / cost, 1), spend=round(cost / 60, 1))) sys.stdout.flush() def run(self): start = time.time() print("[+] Begin to brute domain") i = 0 while not self.queues.empty() or not self.queue_sub.empty(): i = i + 1 domain_list = set(self.get_block()) - self.get_black_subdomain() coroutines = [ gevent.spawn(self.query_domain, l) for l in domain_list ] try: gevent.joinall(coroutines) except KeyboardInterrupt: print('user stop') sys.exit(1) self.deweighting_subdomain() self.cmd_print(self.queues.qsize(), start, time.time(), i) if (self.queues.qsize() < 30000 and self.queue_sub.qsize() > 0): while (self.queues.qsize() < 200000): if not self.generate_sub(): break self.handle_data() self.raw_write_disk() self.deal_write_disk() self.collect_cname() print(self.cmdline) print("[+] Brute over")
class Round_Button(tk.Label): def __init__(self, top, text, size, static_colour, static_t_colour, transformation_colour, transformation_t_colour, background:str='#FFFFFF', static_outline=None, trans_outline=None): ''' :param top: Top level / root. The window in which the button is going to be placed. [Tkinter Object] :param text: Text that is placed on the button. [String] :param size: Multiplier for the size. [Integer] :param static_colour: Colour for the button when static. [Tuple,(R,G,B)] :param static_t_colour: Colour for the text when the button is static. [Tuple,(R,G,B)] :param transformation_colour: Colour for the button when cursor is over it. [Tuple,(R,G,B)] :param transformation_t_colour: Colour for the text when the cursor is over the button. [Tuple,(R,G,B)] :param background: Sets the background colour of the Button so it can blend with the window's background [Tuple, (RGB)] Defaults to WHITE (#FFFFFF) :param static_outline: outline colour of static image. [Tuple, (RGB)] Defaults to static_colour value. :param trans_outline: outline colour of transformed image. [Tuple, (RGB)] Defaults to transformation_colour value. ''' ## Initialisation ## ============== tk.Label.__init__(self, top) # Inherits the features of a label self.sc = static_colour self.tc = transformation_colour self.tsc = static_t_colour self.ttc = transformation_t_colour self.multi = size self.resoltuion = (int(35*size), int(10*size)) # 3.5 : 1 (W : H) self.text = text self.change_to_trans = False self.change_to_static = False self.static_outline = static_outline self.trans_outline = trans_outline if static_outline == None: self.static_outline = static_colour if trans_outline == None: self.trans_outline = transformation_colour self.create_custom_image() #Create static and transformed buttons self.create_lower_button() #Creates Lower Button self.connect_function() self.configure(image=self.Images[9]) #Inserts static button images self.configure(background=background) self.bind("<Enter>", self.on_enter) #Hover on capabilities self.bind("<Leave>", self.on_leave) #Hover off capabilities self.queue = Queue() self.Animator = Thread(target=self.Manage_Animation) self.Animator.start() def create_custom_image(self): decrement = -1 while True: # < decrement > : Used for lowering the font size so that the text doesn't go off the screen. decrement += 1 font = ImageFont.truetype("Assets/GentiumBasic-Bold.ttf", int(5.5 * self.multi) - decrement, encoding="unic") coords, Lines, line_height = self.draw_multiple_line_text(self.text, font, int(36 * self.multi), int(2 * self.multi), 12) if coords[-1][1] + line_height + 5 > self.resoltuion[1]: continue break self.images = [Image.new('RGBA', (self.resoltuion)) for i in range (10)] # Initialising the draw the ImageDraw.Draw object self.image_drawer = [ImageDraw.Draw(self.images[i]) for i in range (10)] self.image_colours = [[self.tc[i] + ((self.sc[i]-self.tc[i])//10)*x for i in range (3)] for x in range (10)] self.text_colours = [[self.ttc[i] + ((self.tsc[i] - self.ttc[i]) // 10) * x for i in range(3)] for x in range(10)] self.outline_colours = [[self.trans_outline[i] + ((self.static_outline[i] - self.trans_outline[i]) // 10) * x for i in range(3)] for x in range(10)] for i in range(10): # Puts the colours in a tuple for use. colour = (self.image_colours[i][0],self.image_colours[i][1],self.image_colours[i][2]) textcolour = (self.text_colours[i][0], self.text_colours[i][1], self.text_colours[i][2]) outline = (self.outline_colours[i][0], self.outline_colours[i][1], self.outline_colours[i][2]) # Creates the base for both images (Rectangles) self.image_drawer[i].rectangle((int(5.5 * self.multi),0, self.resoltuion[0] - int(5.5 * self.multi), self.resoltuion[1]-1), outline=outline, width =2, fill=colour) # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect. # 2 on both sides for 2 images. self.image_drawer[i].rectangle((self.resoltuion[0] - int(5.5 * self.multi), 0, self.resoltuion[0], self.resoltuion[1]-2),fill=(0, 0, 0, 0)) self.image_drawer[i].ellipse((self.resoltuion[0] - int(10 * self.multi), 0, self.resoltuion[0]-1, self.resoltuion[1]-2),outline=outline, width=2, fill=colour) self.image_drawer[i].rectangle((0, 0, int(5.5 * self.multi), int(10 * self.multi)-2), fill=(0, 0, 0, 0)) self.image_drawer[i].ellipse((0, 0, int(10 * self.multi), int(10 * self.multi)-2), outline=outline, width=2 ,fill=(colour)) self.image_drawer[i].rectangle((int(5.5 * self.multi), 2, self.resoltuion[0] - int(5.5 * self.multi), self.resoltuion[1]-3), fill=colour) for x in range (len(coords)): self.image_drawer[i].text(coords[x], Lines[x], fill=textcolour, font=font, align='center') self.Images = [ImageTk.PhotoImage(self.images[i]) for i in range (10)] def create_lower_button(self): multi_d = 0.25 multi = self.multi - multi_d resoltuion = (int(35 * multi), int(10*multi)) decrement = -1 while True: # < decrement > : Used for lowering the font size so that the text doesn't go off the screen. decrement += 1 font = ImageFont.truetype("Assets/GentiumBasic-Bold.ttf", int(5.5 * multi) - decrement,encoding="unic") coords, Lines, line_height = self.draw_multiple_line_text(self.text, font, int(36 * multi),int(2 * multi), 12) if coords[-1][1] + line_height + 5 > self.resoltuion[1]-(10*multi_d): continue break self.lower_button = Image.new('RGBA', (resoltuion)) # Initialising the draw the ImageDraw.Draw object self.lower_drawer = ImageDraw.Draw(self.lower_button) colour = (self.image_colours[0][0], self.image_colours[0][1], self.image_colours[0][2]) textcolour = (self.text_colours[0][0], self.text_colours[0][1], self.text_colours[0][2]) outline = (self.outline_colours[0][0], self.outline_colours[0][1], self.outline_colours[0][2]) # Creates the base for both images (Rectangles) # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect. # 2 on both sides for 2 images. self.lower_drawer.rectangle((0, 0, resoltuion[0], resoltuion[1]-1), outline=outline, width=2, fill=colour) # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect. # 2 on both sides for 2 images. # Right side self.lower_drawer.rectangle((resoltuion[0] - int(5.5*multi), 0, resoltuion[0], resoltuion[1]),fill=(0, 0, 0, 0)) self.lower_drawer.ellipse((resoltuion[0] - int(10*multi), 0, resoltuion[0], resoltuion[1]), outline=outline, width=2, fill=colour) # Left side self.lower_drawer.rectangle((0, 0, int(5.5 * multi), int(10 * multi)), fill=(0, 0, 0, 0)) self.lower_drawer.ellipse((0, 0, int(10 * multi), int(10 * multi)), outline=outline, width=2, fill=(colour)) self.lower_drawer.rectangle((int(5.5 * multi), 2, resoltuion[0] - int(5.5*multi), resoltuion[1]-3), fill=colour) for x in range(len(coords)): self.lower_drawer.text(coords[x], Lines[x], fill=textcolour, font=font, align='center') delta_x = (self.resoltuion[0] - resoltuion[0])//2 delta_y = (self.resoltuion[1] - resoltuion[1])//2 #Perfects the size for pasting. self.lower_button = self.lower_button.resize(size=(self.resoltuion[0] - delta_x*2, self.resoltuion[1] - delta_y*2)) #Pasting Image ontop of transparent image with original resolution. self.Button = Image.new('RGBA', (self.resoltuion)) self.Button.paste(self.lower_button, (delta_x, delta_y, self.resoltuion[0] - delta_x, self.resoltuion[1] - delta_y), self.lower_button) self.lower_button = ImageTk.PhotoImage(self.Button) def draw_multiple_line_text(self, text, font, text_start_width, text_start_height, Line_Width): ## Used for creating multi-line text. Splits the text across multiple lines if the text crosses the line width. y_text = text_start_height x_text = text_start_width lines = textwrap.wrap(text, width=int(Line_Width)) Coords = [] Lines = [] line_height = 0 for line in lines: line_width, line_height = font.getsize(line) coords = [(x_text - line_width) / 2, y_text] y_text += line_height Coords.append(coords) Lines.append(line) return Coords, Lines, line_height ## Animation Effect. ## Hovering. def on_enter(self,*args): #switches images to the transformed button. self.Q_Dump() self.queue.put('E') def Q_Dump(self): for i in range (self.queue.qsize()): self.queue.get_nowait() def on_leave(self,*args): #switches back to static image. self.Q_Dump() self.queue.put('L') def Manage_Animation(self): while True: Factor = self.queue.get() if Factor == 'E': self.change_sc() elif Factor == "L": self.change_tsc() def change_sc(self, si:int=9): self.change_to_static = True for i in range (si,0,-1): if self.change_to_trans == True: self.change_to_static = False self.change_tsc(i) break sleep(0.01) self.configure(image=self.Images[i]) if self.change_to_static: self.change_to_static = False def change_tsc(self, si:int=0): self.change_to_trans = True for i in range (si, 10): if self.change_to_static == True: self.change_to_trans = False self.change_sc(i) break sleep(0.01) self.configure(image=self.Images[i]) if self.change_to_trans: self.change_to_trans = False def connect_function(self, function=lambda:None): #Binds the button to a function. def connector(*args): self.configure(image=self.lower_button) function() def disconnector(*args): self.configure(image=self.Images[0]) self.bind("<ButtonPress-1>", connector) self.bind("<ButtonRelease-1>", disconnector)
class EventManager(MetricsReporter): def __init__(self, event_iterable, event_handlers, event_timeout=DEFAULT_EVENT_TIMEOUT_SECS): self.__reg = None self.__tags = None self.__stopped = False self.__q = Queue() self.__events = event_iterable self.__event_handlers = event_handlers self.__event_timeout = event_timeout self.__processed_count = 0 self.__started = False self.__started_lock = Lock() self.__processing_thread = Thread(target=self.__process_events) self.__pulling_thread = Thread(target=self.__pull_events) self.last_successful_event_epoch_s = 0 config_manager = get_config_manager() rebalance_frequency = config_manager.get_float(REBALANCE_FREQUENCY_KEY, DEFAULT_REBALANCE_FREQUENCY) if rebalance_frequency > 0: schedule.every(rebalance_frequency).seconds.do(self.__rebalance) reconcile_frequency = config_manager.get_float(RECONCILE_FREQUENCY_KEY, DEFAULT_RECONCILE_FREQUENCY) if reconcile_frequency > 0: schedule.every(reconcile_frequency).seconds.do(self.__reconcile) oversubscribe_frequency = config_manager.get_float(OVERSUBSCRIBE_FREQUENCY_KEY, DEFAULT_OVERSUBSCRIBE_FREQUENCY) if oversubscribe_frequency > 0: schedule.every(oversubscribe_frequency).seconds.do(self.__oversubscribe) def join(self): self.__pulling_thread.join() self.__processing_thread.join() def stop_processing_events(self): self.__stopped = True self.__events.close() self.join() def start_processing_events(self): with self.__started_lock: if self.__started: return self.__processing_thread.start() self.__pulling_thread.start() self.__started = True def get_queue_depth(self): return self.__q.qsize() def get_processed_count(self): return self.__processed_count def __rebalance(self): self.__put_event(REBALANCE_EVENT) def __reconcile(self): self.__put_event(RECONCILE_EVENT) def __oversubscribe(self): self.__put_event(OVERSUBSCRIBE_EVENT) def __pull_events(self): for event in self.__events: self.__put_event(event) def __put_event(self, event): event = json.loads(event.decode("utf-8")) if event[ACTION] in HANDLED_ACTIONS: log.info("Enqueuing event: {}, queue depth: {}".format(event[ACTION], self.get_queue_depth())) event[ENQUEUE_TIME_KEY] = time.time() self.__q.put(event) if self.__reg is not None: self.__reg.counter(ENQUEUED_COUNT_KEY, self.__tags).increment() self.__reg.counter(self.__get_enqueued_metric_name(event), self.__tags).increment() def __process_events(self): while not self.__stopped: try: event = self.__q.get(timeout=self.__event_timeout) dequeue_time = time.time() log.info("Dequeued event: {}, queue depth: {}".format(event[ACTION], self.get_queue_depth())) if self.__reg is not None: self.__reg.counter(DEQUEUED_COUNT_KEY, self.__tags).increment() self.__reg.counter(self.__get_dequeued_metric_name(event), self.__tags).increment() self.__reg.distribution_summary(QUEUE_LATENCY_KEY, self.__tags).record(dequeue_time - event[ENQUEUE_TIME_KEY]) except Empty: log.debug("Timed out waiting for event on queue.") continue for event_handler in self.__event_handlers: try: log.info("{} handling event: {}".format(type(event_handler).__name__, event[ACTION])) event_handler.handle(event) self.__report_succeeded_event(event_handler) except: log.exception("Event handler: '{}' failed to handle event: '{}'".format( type(event_handler).__name__, event)) self.__report_failed_event(event_handler) self.__q.task_done() self.__reg.counter(EVENT_PROCESSED_KEY, self.__tags).increment() self.__reg.gauge(QUEUE_DEPTH_KEY, self.__tags).set(self.get_queue_depth()) self.__processed_count += 1 def __report_succeeded_event(self, event_handler: EventHandler): if self.__reg is not None: self.__reg.counter(self.__get_event_succeeded_metric_name(event_handler), self.__tags).increment() self.__reg.counter(EVENT_SUCCEEDED_KEY, self.__tags).increment() self.last_successful_event_epoch_s = datetime.utcnow().timestamp() def __report_failed_event(self, event_handler: EventHandler): if self.__reg is not None: self.__reg.counter(self.__get_event_failed_metric_name(event_handler), self.__tags).increment() self.__reg.counter(EVENT_FAILED_KEY, self.__tags).increment() @staticmethod def __get_event_succeeded_metric_name(event_handler: EventHandler) -> str: return "titus-isolate.{}.eventSucceeded".format(type(event_handler).__name__) @staticmethod def __get_event_failed_metric_name(event_handler: EventHandler) -> str: return "titus-isolate.{}.eventFailed".format(type(event_handler).__name__) @staticmethod def __get_enqueued_metric_name(event) -> str: return "titus-isolate.{}.eventEnqueued".format(event[ACTION]) @staticmethod def __get_dequeued_metric_name(event) -> str: return "titus-isolate.{}.eventDequeued".format(event[ACTION]) def set_registry(self, registry, tags): self.__reg = registry self.__tags = tags def report_metrics(self, tags): pass
class QA_Thread(threading.Thread): ''' 这是一个随意新建线程的生产者消费者模型' 其实有个队列, 队列中保存的是 QA_Task 对象 , callback 很重要,指定任务的时候可以绑定 函数执行 QA_Engine 继承这个类。 自带一个Queue 有 self.put/ self.put_nowait/ self.get/ self.get_nowait 4个关于queue的方法 如果你重写了run方法: 则你需要自行处理queue中的事情/简单的做你自己的逻辑 ''' def __init__(self, queue=None, name=None, daemon=False): threading.Thread.__init__(self) self.queue = Queue() if queue is None else queue self.thread_stop = False self.__flag = threading.Event() # 用于暂停线程的标识 self.__flag.set() # 设置为True self.__running = threading.Event() # 用于停止线程的标识 self.__running.set() # 将running设置为True self.name = QA_util_random_with_topic(topic='QA_Thread', lens=3) if name is None else name self.idle = False self.daemon = daemon def __repr__(self): return '<QA_Thread: {} id={} ident {}>'.format( self.name, id(self), self.ident) def run(self): while self.__running.isSet(): self.__flag.wait() while not self.thread_stop: '这是一个阻塞的队列,避免出现消息的遗漏' try: if self.queue.empty() is False: _task = self.queue.get() # 接收消息 #print(_task.worker, self.name) assert isinstance(_task, QA_Task) if _task.worker != None: _task.do() self.queue.task_done() # 完成一个任务 else: pass else: self.idle = True # Mac book下风扇狂转,如果sleep cpu 占用率回下降 # time.sleep(0.01) except Exception as e: if isinstance(e, ValueError): pass else: raise e def pause(self): self.__flag.clear() def resume(self): self.__flag.set() # 设置为True, 让线程停止阻塞 def stop(self): # self.__flag.set() # 将线程从暂停状态恢复, 如何已经暂停的话 self.__running.clear() self.thread_stop = True # 设置为False def __start(self): self.queue.start() def put(self, task): self.queue.put(task) def put_nowait(self, task): self.queue.put_nowait(task) def get(self): return self.queue.get() def get_nowait(self): return self.queue.get_nowait() def qsize(self): return self.queue.qsize()
n, m = map(int, input().split()) tree = {} for _ in range(m): father, _, *son = map(int, input().split()) tree[father] = son from queue import Queue q = Queue() q.put(1) level = [] while q.qsize(): cnt = 0 size = q.qsize() while size: size -= 1 temp = q.get() if tree.get(temp) is None: cnt += 1 else: for each in tree[temp]: q.put(each) level.append(cnt) for i in range(len(level)): print("%d" % level[i], end='') if i != len(level) - 1: print(" ", end='')
class BaseServer(TcBase): def __init__(self, interface="0.0.0.0", port=9998): super().__init__() self.port = port self.interface = interface self.sock = None self.clients = {} self.send_q = Queue() self.running = False self.server_thread = None self.ready_flag = True self.newClientEvent = TcEvent() self.clientRemovedEvent = TcEvent() self.clientsClearedEvent = TcEvent() def start(self, interface="0.0.0.0", port=9998): self.interface = interface self.port = port self.server_thread = Thread(target=self.server) self.running = True self.server_thread.start() def stop(self): self.running = False self.logger.info("Server stopped") def server(self): try: self.sock = socket.socket( socket.AF_INET, # Internet socket.SOCK_DGRAM) # UDP self.sock.setblocking(False) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.sock.bind((self.interface, self.port)) except socket.error as e: self.logger.error(f"Error creating socket: {e}") self.stop() self.logger.info( f"UDP Server started. Listening on port: {self.port}") # TODO: Make more efficient/elegant while self.running: self.remove_old_clients() # Send messages from send queue self.send_msgs() readable, writeable, errors = select.select([self.sock], [], [], 0) # print(readable) for sock in readable: try: d = sock.recvfrom(128) self.handle_msg(d) except socket.error as e: self.logger.error(e) time.sleep(0.05) self.remove_clients() self.sock.close() self.logger.debug(f"Socket closed") def remove_old_clients(self): # Identify clients that haven't sent a heartbeat for a while now = time.time() remove = set() for c, hb_ts in self.clients.items(): if now - hb_ts > 20: remove.add(c) for c in remove: self.remove_client(c) def add_client(self, client): self.logger.info(f"New client connection from: {client}") self.clients[client] = time.time() self.newClientEvent.emit(client) def remove_client(self, client): self.logger.info(f"Removing client connection: {client}") self.clients.pop(client) if not self.clients: self.logger.info( "No clients connected. Waiting for new client connections...") self.clientRemovedEvent.emit(client) def remove_clients(self): self.clients = {} self.logger.debug("Client list cleared") self.clientsClearedEvent.emit() def handle_heartbeat(self, client): now = time.time() if client not in self.clients: self.add_client(client) self.clients[client] = now # self.logger.debug(f"Heartbeat from: {client}") # self.heartbeat_reply(client) def heartbeat_reply(self, client): self.add_to_send_queue("HB REPLY", client) def add_to_send_queue(self, msg, client): msg_pkg = (msg, (client[0], client[1])) self.send_q.put(msg_pkg) def send_msgs(self): while self.send_q.qsize() > 0: msg_pkg = self.send_q.get() self.send_msg(msg_pkg[0], msg_pkg[1]) def send_msg(self, msg: str, client): try: self.sock.sendto(msg.encode('utf-8'), client) self.logger.debug(f"MSG SENT: {msg} {client}") except socket.error as e: self.logger.error( f"There was a socket error while attempting to send a message: {e}" ) def handle_msg(self, d): msg = d[0].decode() client = d[1] self.logger.debug(f"MSG RECV: {client} {msg}") if msg == "/sync/add" or client not in self.clients: self.add_client(client) elif msg == "/sync/remove": self.remove_client(client) elif msg == "/heartbeat": self.handle_heartbeat(client) else: self.logger.error(f"UNHANDLED MESSAGE: {client} {msg}")
class InteractiveBrokersExchange(Exchange): '''Interactive Brokers Exchange''' def __init__(self, trading_type, verbose, account='', delayed=True, **kwargs): self._trading_type = trading_type self._verbose = verbose if self._trading_type == TradingType.LIVE: super().__init__(ExchangeType('interactivebrokers')) else: super().__init__(ExchangeType('interactivebrokerspaper')) # map order.id to order self._orders = {} # IB TWS gateway self._order_event_queue = Queue() self._market_data_queue = Queue() self._contract_lookup_queue = Queue() self._account_position_queue = Queue() self._api = _API(account, self.exchange(), delayed, self._order_event_queue, self._market_data_queue, self._contract_lookup_queue, self._account_position_queue) # *************** # # General methods # # *************** # async def instruments(self): '''get list of available instruments''' return [] async def connect(self): '''connect to exchange. should be asynchronous. For OrderEntry-only, can just return None ''' if self._trading_type == TradingType.LIVE: print('*' * 100) print('*' * 100) print('WARNING: LIVE TRADING') print('*' * 100) print('*' * 100) self._api.connect('127.0.0.1', 7496, randint(0, 10000)) self._api_thread = threading.Thread(target=self._api.run, daemon=True) self._api_thread.start() else: self._api.connect('127.0.0.1', 7497, randint(0, 10000)) self._api_thread = threading.Thread(target=self._api.run, daemon=True) self._api_thread.start() while self._api.nextOrderId is None: print('waiting for IB connect...') await asyncio.sleep(1) print('IB connected!') async def lookup(self, instrument): self._api.reqContractDetails(_constructContract(instrument)) i = 0 while i < 5: if self._contract_lookup_queue.qsize() > 0: ret = [] while self._contract_lookup_queue.qsize() > 0: contract_details = self._contract_lookup_queue.get() ret.append(_constructInstrument(contract_details.contract)) return ret else: await asyncio.sleep(1) i += 1 # ******************* # # Market Data Methods # # ******************* # async def subscribe(self, instrument): self._api.subscribeMarketData(instrument) async def tick(self): '''return data from exchange''' while True: # clear order events while self._order_event_queue.qsize() > 0: order_data = self._order_event_queue.get() status = order_data['status'] order = self._orders[order_data['orderId']] if status in ('ApiPending', 'PendingSubmit', 'PendingCancel', 'PreSubmitted', 'ApiCancelled', 'Inactive'): # ignore continue elif status in ('Submitted', ): # TODO more granular order events api? # ignore pass elif status in ('Cancelled', ): e = Event(type=EventType.CANCELED, target=order) yield e elif status in ('Filled', ): # set filled order.filled = order_data['filled'] # create trade object t = Trade(volume=order_data['filled'], price=order_data['avgFillPrice'], maker_orders=[], taker_order=order) # set my order t.my_order = order e = Event(type=EventType.TRADE, target=t) yield e # clear market data events while self._market_data_queue.qsize() > 0: market_data = self._market_data_queue.get() instrument = market_data['instrument'] price = market_data['price'] o = Order(volume=1, price=price, side=Side.BUY, instrument=instrument, exchange=self.exchange()) t = Trade(volume=1, price=price, taker_order=o, maker_orders=[]) yield Event(type=EventType.TRADE, target=t) await asyncio.sleep(0) # clear market data events # TODO # ******************* # # Order Entry Methods # # ******************* # async def accounts(self): '''get accounts from source''' self._api.reqPositions() i = 0 while i < 5: if self._account_position_queue.qsize() > 0: return self._account_position_queue.get() else: await asyncio.sleep(1) i += 1 async def newOrder(self, order): '''submit a new order to the exchange. should set the given order's `id` field to exchange-assigned id For MarketData-only, can just return None ''' # construct IB contract and order ibcontract, iborder = _constructContractAndOrder(order) # send to IB id = self._api.placeOrder(ibcontract, iborder) # update order id order.id = id self._orders[order.id] = order async def cancelOrder(self, order: Order): '''cancel a previously submitted order to the exchange. For MarketData-only, can just return None ''' self._api.cancelOrder(order.id)
def train(self, texts, chunksize=100, workers=2): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of utf8 strings. """ logger.info("training model with %i workers" % (workers)) start, next_report = time.time(), [1.0] jobs = Queue( maxsize=2 * workers ) # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( lock = threading.Lock( ) # for shared state (=number of words trained so far, log reports...) total_error = [0.0] objects_done = [0] def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" observation_work = np.zeros(self.window * self.size + self.object_size, dtype=REAL) prediction_work = np.zeros(self.output_size, dtype=REAL) composition_work = np.zeros( [ max(self.output_size, self.window * self.size + self.object_size), self.window * self.size + self.object_size ], dtype=REAL) if self.bilinear_form else None while True: job = jobs.get() if job is None: # data finished, exit break # how many words did we train on? out-of-vocabulary (unknown) words do not count error = sum( train_sentence_concatenation( self, sentence, object_index, softmax_target, sigmoid_target, self._alpha, prediction_work, observation_work, composition_work) for sentence, object_index, softmax_target, sigmoid_target in job) with lock: total_error[0] += error objects_done[0] += len(job) elapsed = time.time() - start if elapsed >= next_report[0]: logger.info("PROGRESS: %s objects, %.0f objects/s" % (objects_done[0], float(objects_done[0]) / elapsed if elapsed else 0.0)) next_report[ 0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports dynos = [ threading.Thread(target=worker_train) for _ in range(0, workers) ] for thread in dynos: thread.daemon = True # make interrupting the process with ctrl+c easier thread.start() # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue no_oov = ((np.array([self.vocab.get_index(word) for word in sentence], dtype=INT), object_index, softmax_target, sigmoid_target) for sentence, object_index, softmax_target, sigmoid_target in texts) for job_no, job in enumerate(gensim_utils.grouper(no_oov, chunksize)): logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize())) jobs.put(job) logger.info( "reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize()) for _ in range(0, workers): jobs.put( None ) # give the workers heads up that they can finish -- no more work! for thread in dynos: thread.join() elapsed = time.time() - start logger.info("training on %i objects took %.1fs, %.0f words/s" % (objects_done[0], elapsed, objects_done[0] / elapsed if elapsed else 0.0)) return (objects_done[0], total_error[0])
class QiuBai: def __init__(self): self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } self.urlQueue = Queue() self.htmlQueue = Queue() self.contentQueue = Queue() ''' 获取所有页面的url,并返回urllist ''' def getTotalUrl(self): urlTemp = "https://www.qiushibaike.com/hot/page/{}/" for i in range(1, 36): self.urlQueue.put(urlTemp.format(i)) print("getTotalUrl is over", self.urlQueue.qsize()) ''' 发送请求,获取响应,同事etree处理html ''' def parseUrl(self): while self.urlQueue.not_empty: url = self.urlQueue.get() print("parsing Url: ", url) response = requests.get(url, headers=self.headers, timeout=10) #获取html字符串 html = response.content.decode() #获取element类型的html html = etree.HTML(html) self.htmlQueue.put(html) #放在此处,保证url已经获得反馈以后再将url设置为任务已完成 print("pu HQ: ", self.htmlQueue.qsize()) print("pu UQ: ", self.urlQueue.qsize()) self.urlQueue.task_done() ''' 返回一个list,包含一个url对应页面的所有段子的所有内容的列表 ''' def getContent(self): print("getContent start!", self.htmlQueue.qsize()) while self.htmlQueue.not_empty: html = self.htmlQueue.get() totalDiv = html.xpath("//*[starts-with(@id, 'qiushi_tag_')]") items = [] #遍历totalDiv,获取段子的所有信息 for i in totalDiv: author_img = i.xpath( './div[@class="author clearfix"]/a[1]/img/@src') author_img = "https:" + author_img[0] if len( author_img) > 0 else None author_name = i.xpath( './div[@class="author clearfix"]/a[2]/h2/text()') author_name = author_name[0] if len(author_name) > 0 else None author_href = i.xpath( './div[@class="author clearfix"]/a[1]/@href') author_href = "https://www.qiushibaike.com" + author_href[ 0] if len(author_href) > 0 else None author_gender = i.xpath( './div[@class="author clearfix"]//div/@class') author_gender = author_gender[0].split(" ")[-1].replace( "Icon", "") if len(author_gender) > 0 else None author_age = i.xpath( './div[@class="author clearfix"]//div/text()') author_age = author_age[0] if len(author_age) > 0 else None content = i.xpath('./a[@class="contentHerf"]/div/span/text()') content_vote = i.xpath( './div[@class="stats"]/span[1]/i/text()') content_vote = content_vote[0] if len( content_vote) > 0 else None content_comment_numbers = i.xpath( './div[@class="stats"]/span[2]/a/i/text()') content_comment_numbers = content_comment_numbers[0] if len( content_comment_numbers) > 0 else None hot_comment_author = i.xpath( './a[@class="indexGodCmt"]/div/span[last()]/text()') hot_comment_author = hot_comment_author[0] if len( hot_comment_author) > 0 else None hot_comment = i.xpath( './a[@class="indexGodCmt"]/div/div/text()') hot_comment = hot_comment[0].replace("\n:", "").replace( "\n", "") if len(hot_comment) > 0 else None hot_comment_like_num = i.xpath( './a[@class="indexGodCmt"]/div/div/div/text()') hot_comment_like_num = hot_comment_like_num[-1].replace( "\n", "") if len(hot_comment_like_num) > 0 else None item = dict(author_name=author_name, author_img=author_img, author_href=author_href, author_gender=author_gender, author_age=author_age, content=content, content_vote=content_vote, content_comment_numbers=content_comment_numbers, hot_comment=hot_comment, hot_comment_author=hot_comment_author, hot_comment_like_num=hot_comment_like_num) items.append(item) self.contentQueue.put(items) print("gt contentQ: ", self.contentQueue.qsize()) print("gt htmlQ: ", self.htmlQueue.qsize()) self.htmlQueue.task_done() ''' 保持items ''' def saveItems(self): print("saveItems start cq: ", self.contentQueue.qsize()) while self.contentQueue.not_empty: items = self.contentQueue.get() f = open('./res/qb.txt', "a", encoding='utf-8') for i in items: json.dump(i, f, ensure_ascii=False, indent=2) f.close() self.contentQueue.task_done() ''' 获取url list url_list = self.getTotalUrl ''' def run(self): threadList = [] threadUrl = threading.Thread(target=self.getTotalUrl) threadList.append(threadUrl) #发送网络请求 for i in range(10): threadParse = threading.Thread(target=self.parseUrl) threadList.append(threadParse) #提取数据 threadGetContent = threading.Thread(target=self.getContent) threadList.append(threadGetContent) #保存 threadSave = threading.Thread(target=self.saveItems) threadList.append(threadSave) for t in threadList: t.setDaemon(True) #为每个进程设置为后台进程,效果是如果主进程退出子进程也会退出 t.start() #让主线程等待,所有的队列为空的时候才能退出 self.urlQueue.join() self.htmlQueue.join() self.contentQueue.join()
class RestClient(object): """ HTTP 客户端。目前是为了对接各种RESTfulAPI而设计的。 如果需要给请求加上签名,请设置beforeRequest, 函数类型请参考defaultBeforeRequest。 如果需要处理非2xx的请求,请设置onFailed,函数类型请参考defaultOnFailed。 如果每一个请求的非2xx返回都需要单独处理,使用addReq函数的onFailed参数 如果捕获Python内部错误,例如网络连接失败等等,请设置onError,函数类型请参考defaultOnError """ #---------------------------------------------------------------------- def __init__(self): """ """ self.urlBase = None # type: str self._active = False self._queue = Queue() self._pool = None # type: Pool self._queueing_times = deque(maxlen=100) self._response_times = deque(maxlen=100) #---------------------------------------------------------------------- def init(self, urlBase): """ 初始化 :param urlBase: 路径前缀。 例如'https://www.bitmex.com/api/v1/' """ self.urlBase = urlBase #---------------------------------------------------------------------- def _createSession(self): """""" return requests.session() #---------------------------------------------------------------------- def start(self, n=3): """启动""" if self._active: return self._active = True self._pool = Pool(n) self._pool.apply_async(self._run) #---------------------------------------------------------------------- def stop(self): """ 强制停止运行,未发出的请求都会被暂停(仍处于队列中) :return: """ self._active = False #---------------------------------------------------------------------- def join(self): """ 等待所有请求处理结束 如果要并确保RestClient的退出,请在调用stop之后紧接着调用join。 如果只是要确保所有的请求都处理完,直接调用join即可。 :return: """ self._queue.join() #---------------------------------------------------------------------- def addRequest( self, method, # type: str path, # type: str callback, # type: Callable[[dict, Request], Any] params=None, # type: dict data=None, # type: dict headers=None, # type: dict onFailed=None, # type: Callable[[int, Request], Any] onError=None, # type: Callable[[type, Exception, traceback, Request], Any] extra=None # type: Any ): # type: (...)->Request """ 发送一个请求 :param method: GET, POST, PUT, DELETE, QUERY :param path: :param callback: 请求成功后的回调(状态吗为2xx时认为请求成功) type: (dict, Request) :param params: dict for query string :param data: dict for body :param headers: dict for headers :param onFailed: 请求失败后的回调(状态吗不为2xx时认为请求失败)(如果指定该值,默认的onFailed将不会被调用) type: (code, dict, Request) :param onError: 请求出现Python错误后的回调(如果指定该值,默认的onError将不会被调用) type: (etype, evalue, tb, Request) :param extra: 返回值的extra字段会被设置为这个值。当然,你也可以在函数调用之后再设置这个字段。 :return: Request """ request = Request(method, path, params, data, headers, callback) request.extra = extra request.onFailed = onFailed request.onError = onError request.createDatetime = datetime.now() request.deliverDatetime = None request.responseDatetime = None self._queue.put(request) return request #---------------------------------------------------------------------- def _run(self): session = self._createSession() while self._active: try: request = self._queue.get(timeout=1) try: self._processRequest(request, session) finally: self._queue.task_done() except Empty: pass except: et, ev, tb = sys.exc_info() self.onError(et, ev, tb, None) #---------------------------------------------------------------------- def sign(self, request): # type: (Request)->Request """ 所有请求在发送之前都会经过这个函数 签名之类的前奏可以在这里面实现 需要对request进行什么修改就做什么修改吧 @:return (request) """ return request #---------------------------------------------------------------------- def onFailed(self, httpStatusCode, request): # type:(int, Request)->None """ 请求失败处理函数(HttpStatusCode!=2xx). 默认行为是打印到stderr """ sys.stderr.write(str(request)) #---------------------------------------------------------------------- def onError( self, exceptionType, # type: type exceptionValue, # type: Exception tb, request # type: Optional[Request] ): """ Python内部错误处理:默认行为是仍给excepthook :param request 如果是在处理请求的时候出错,它的值就是对应的Request,否则为None """ sys.stderr.write( self.exceptionDetail(exceptionType, exceptionValue, tb, request)) sys.excepthook(exceptionType, exceptionValue, tb) #---------------------------------------------------------------------- def exceptionDetail( self, exceptionType, # type: type exceptionValue, # type: Exception tb, request # type: Optional[Request] ): text = "[{}]: Unhandled RestClient Error:{}\n".format( datetime.now().isoformat(), exceptionType) text += "request:{}\n".format(request) text += "Exception trace: \n" text += "".join( traceback.format_exception( exceptionType, exceptionValue, tb, )) return text #---------------------------------------------------------------------- def _processRequest(self, request, session): # type: (Request, requests.Session)->None """ 用于内部:将请求发送出去 """ # noinspection PyBroadException try: request = self.sign(request) url = self.makeFullUrl(request.path) request.deliverDatetime = datetime.now() self._queueing_times.append( (request.deliverDatetime - request.createDatetime).total_seconds()) response = session.request(request.method, url, headers=request.headers, params=request.params, data=request.data) request.response = response request.responseDatetime = datetime.now() self._response_times.append( (request.responseDatetime - request.deliverDatetime).total_seconds()) httpStatusCode = response.status_code if httpStatusCode / 100 == 2: # 2xx都算成功,尽管交易所都用200 jsonBody = response.json() request.callback(jsonBody, request) request.status = RequestStatus.success else: request.status = RequestStatus.failed if request.onFailed: jsonBody = response.json() request.onFailed(jsonBody, request) self.onFailed(httpStatusCode, request) except: request.status = RequestStatus.error t, v, tb = sys.exc_info() if request.onError: request.onError(t, v, tb, request) else: self.onError(t, v, tb, request) #---------------------------------------------------------------------- def makeFullUrl(self, path): """ 将相对路径补充成绝对路径: eg: makeFullUrl('/get') == 'http://xxxxx/get' :param path: :return: """ url = self.urlBase + path return url def getStatus(self): """ 获取此时client的一些运行时基本信息 """ return { "queueing_number": self._queue.qsize(), "avg_queueing_time": sum(self._queueing_times) / len(self._queueing_times) if len(self._queueing_times) else 0, "avg_response_time": sum(self._response_times) / len(self._response_times) if len(self._response_times) else 0 }
def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) log.debug("[%s] Extracting torrents from %s using definitions: %s" % (provider, provider, repr(definition))) if not client.content: if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Page content is empty" % provider) raise StopIteration dom = Html().feed(client.content) key_search = get_search_query(definition, "key") row_search = get_search_query(definition, "row") name_search = get_search_query(definition, "name") torrent_search = get_search_query(definition, "torrent") info_hash_search = get_search_query(definition, "infohash") size_search = get_search_query(definition, "size") seeds_search = get_search_query(definition, "seeds") peers_search = get_search_query(definition, "peers") referer_search = get_search_query(definition, "referer") log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey headers = {} if "subpage_mode" in definition: if definition["subpage_mode"] == "xhr": headers['X-Requested-With'] = 'XMLHttpRequest' headers['Content-Language'] = '' if referer: headers['Referer'] = referer uri = torrent.split('|') # Split cookies for private trackers subclient.open(py2_encode(uri[0]), headers=headers) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error("[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("[%s] Subpage torrent for %s: %s" % (provider, repr(uri[0]), torrent)) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Could not parse DOM from page content" % provider) raise StopIteration if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', ''))) key = eval(key_search) if key_search else "" if key_search and get_setting("use_debug_parser", bool): key_str = key.__str__() log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'key', key_search, key_str.replace('\r', '').replace('\n', ''))) items = eval(row_search) if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(items), 'row', row_search)) for item in items: if get_setting("use_debug_parser", bool): item_str = item.__str__() log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace('\r', '').replace('\n', ''))) if not item: continue try: name = eval(name_search) if name_search else "" torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" referer = eval(referer_search) if referer_search else "" if 'magnet:?' in torrent: torrent = torrent[torrent.find('magnet:?'):] if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name)) log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent)) log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size)) log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds)) log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers)) if info_hash_search: log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash)) if referer_search: log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', referer_search, referer)) # Pass client cookies with torrent if private if not torrent.startswith('magnet'): user_agent = USER_AGENT if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = {'Authorization': client.token, 'User-Agent': user_agent} log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: parsed_url = urlparse(torrent.split('|')[0]) cookie_domain = '{uri.netloc}'.format(uri=parsed_url) cookie_domain = re.sub('www\d*\.', '', cookie_domain) cookies = [] for cookie in client._cookies: if cookie_domain in cookie.domain: cookies.append(cookie) headers = {} if cookies: headers = {'User-Agent': user_agent} log.debug("[%s] Cookies res: %s / %s" % (provider, repr(headers), repr(client.request_headers))) if client.request_headers: headers.update(client.request_headers) if client.url: headers['Referer'] = client.url headers['Origin'] = client.url # Need to set Cookie afterwards to avoid rewriting it with session Cookies headers['Cookie'] = ";".join(["%s=%s" % (c.name, c.value) for c in cookies]) else: headers = {'User-Agent': user_agent} torrent = append_headers(torrent, headers) if name and torrent and needs_subpage and not torrent.startswith('magnet'): if not torrent.startswith('http'): torrent = definition['root_url'] + py2_encode(torrent) t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) except Exception as e: log.error("[%s] Got an exception while parsing results: %s" % (provider, repr(e))) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
class ScrapeWorker(Process): def __init__(self, model, dummy=False): super(ScrapeWorker, self).__init__() self.source_q = Queue() self.parse_q = Queue() self.seen = ScalableBloomFilter() self.forwarded = ScalableBloomFilter() self.new_sources = [] self.workers = [] self.to_forward = [] self.parser = None self.done_parsing = False self.no_more_sources = False self.dbs = dict() self.schedule = model.schedule self.model = model self.source_kill = None self.dummy = dummy db_threads = defaultdict(list) # Check if the functions in each template are used properly # and store which types of databases are needed. for phase in self.model.phases: for template in phase.templates: self.check_functions(template, phase) if template.db_type: db_threads[template.db_type].append(template) # Start all the threads necessary for storing the data and give each # template a reference to the thread it needs to store data in. for thread, templates in db_threads.items(): if not dummy: store_thread = databases._threads[thread]() else: store_thread = databases._threads['dummy']() for template in templates: self.dbs[template.name] = store_thread store_thread.start() def run(self): # create the threads needed to scrape i = 0 while i < len(self.model.phases): # if self.is_scheduled(): phase = self.model.phases[i] print('running phase:', i, phase.name) # Check if the phase has a parser, if not, reuse the one from the # last phase. self.to_parse = 0 self.parsed = 0 if phase.active: self.spawn_workforce(phase) self.add_sources(phase) self.to_forward = [] self.parse_sources() if not phase.repeat: i += 1 for db in self.dbs.values(): db.store_q.put(None) for db in self.dbs.values(): db.store_q.join() print('Waiting for the database') print('Scraper fully stopped') def parse_sources(self): while True: if self.to_parse == self.parsed: break try: source = self.parse_q.get(timeout=10) except Empty: if self.source_q.empty(): print('No more sources to parse at this point') break # elif self.paused: # time.sleep(self.get_sleep_time()) else: print('Waiting for sources to parse') source = None if source is not None: self.seen.add(source.url) objects = self.parser.parse(source) self.parsed += 1 for obj in objects: if obj.db: self.dbs[obj.name].store_q.put(obj) for new_source in self.new_sources: self._gen_source(*new_source) self.new_sources = [] self.show_progress() print('Unparsed ', self.source_q.qsize()) def spawn_workforce(self, phase): # check if phase reuses the current source workforce if phase.parser: self.parser = phase.parser(parent=self, templates=phase.templates) elif not self.parser and not phase.parser: raise Exception('No parser was specified') else: parse_class = self.parser.__class__ self.parser = parse_class(parent=self, templates=phase.templates) if phase.n_workers: n_workers = phase.n_workers else: n_workers = self.model.num_getters # Kill existing workers if there are any if self.workers: self.source_kill.set() # Create new Event to be able to kill the source workers self.source_kill = Event() self.workers = [ phase.source_worker(parent=self, id=i, stop_event=self.source_kill) for i in range(n_workers) ] for worker in self.workers: worker.start() def add_sources(self, phase): urls_in_db = [] if phase.synchronize: urls_in_db = [url for url in self.get_scraped_urls(phase)] for source in self.to_forward: if source.url not in urls_in_db: self.source_q.put(source) self.to_parse += 1 for source in phase.sources: if source.from_db: sources = self.dbs[source.from_db].read(source.from_db) if source.active: self.source_q.put(source) self.to_parse += 1 def get_scraped_urls(self, phase): for template in phase.templates: if template.name in self.dbs: for objct in self.dbs[template.name].read(template): if objct: yield objct.attrs['url'].value def _gen_source(self, objct, attr): for value in attr.value: # for now only "or" is supported. if not self._evaluate_condition(objct, attr): continue url = self._apply_src_template(attr.source, value) attrs = [] if attr.source.copy_attrs: attrs_to_copy = attr.source.copy_attrs assert all(attr in objct.attrs for attr in attrs_to_copy) if type(attrs_to_copy) == dict: # We store the copied attributes under different names. for key, value in attrs_to_copy.items(): attrs.append(objct.attrs[key](name=value)) else: for key in attrs_to_copy: attrs.append(objct.attrs[key]()) new_source = attr.source(url=url, attrs=attrs) if attr.attr_condition: if self.value_is_new(objct, value, attr.attr_condition): self._add_source(new_source) else: self._add_source(new_source) def _add_source(self, source): if source.url and (source.url not in self.seen or source.duplicate) \ and source.url not in self.forwarded: if source.active: self.to_parse += 1 self.source_q.put(source) self.seen.add(source.url) else: self.to_forward.append(source) self.forwarded.add(source.url) def value_is_new(self, objct, uri, name): db_objct = self.db.read(uri, objct) if db_objct and db_objct.attrs.get(name): if db_objct.attrs[name].value != objct.attrs[name].value: return True return False def _apply_src_template(self, source, url): if source.src_template: # use formatting notation in the src_template return source.src_template.format(url) return url def _evaluate_condition(self, objct, attr, **kwargs): # TODO add "in", and other possibilities. if attr.source_condition: for name, cond in attr.source_condition.items(): values = objct.attrs[name].value # Wrap the value in a list without for example seperating the # characters. values = [values] if type(values) != list else values for val in values: if val and not eval(str(val) + cond, {}, {}): return False return True def reset_source_queue(self): while not self.source_q.empty(): try: self.source_q.get(False) except Empty: continue self.source_q.task_done() def show_progress(self): if not self.dummy: os.system('clear') info = ''' Domain {} Sources to get: {} Sources to parse: {} Sources parsed: {} Average get time: {}s Average parse time: {}s ''' get_average = sum(w.mean for w in self.workers) / len(self.workers) print( info.format(self.name, self.source_q.qsize(), self.to_parse, self.parsed, round(get_average, 3), round(self.parser.total_time / self.parsed, 3))) def check_functions(self, template, phase): error_string = "One of these functions: {} is not implemented in {}." not_implemented = [] for attr in template.attrs.values(): for func in attr.func: if not getattr(phase.parser, func, False): not_implemented.append(func) if not_implemented: raise Exception( error_string.format(str(not_implemented), phase.parser.__class__.__name__))
def index(request): ''' res: [ {"rentdate": yyyy-MM-dd, "venueName": xxxx, "rentTimePeriod": HH:mm:ss~HH:mm:ss ...}, ... ] cal: [ { "date": 1, # day 1 "courts": [ {"rentdate": d, "venueName": xxxx, "rentTimePeriod": HH:mm:ss~HH:mm:ss ...}, ... ] }, ... ] ''' if 'month' in request.GET and request.GET['month']: requestMonth = int(request.GET['month']) else: requestMonth = datetime.now().month if 'year' in request.GET and request.GET['year']: requestYear = int(request.GET['year']) else: requestYear = datetime.now().year currentYear = datetime.now().year ## for copyright year requestTime = datetime(requestYear, requestMonth, 1) requestDateS = datetime(requestTime.year, requestTime.month, 1).strftime("%Y-%m-%d") ## yyyy-MM-dd requestDateE = datetime( requestTime.year, requestTime.month, calendar.monthrange(requestTime.year, requestTime.month)[1]).strftime( "%Y-%m-%d") ## yyyy-MM-dd monthList = [i for i in range(1, 13)] monthselect = [""] * 12 monthselect[requestMonth - 1] = "selected" res = [] isDrawn = True q = Queue() threads = [] for court in requestvenueId: key = { 'rentDateS': requestDateS, 'rentDateE': requestDateE, 'venueId': court } t = threading.Thread(target=threadIndex, args=(q, key)) t.start() threads.append(t) for thread in threads: thread.join() for _ in range(q.qsize()): data = q.get() res += data['res'] isDrawn &= data['isDrawn'] res.sort(key=lambda s: s['rentDate']) ## calendar weekdayS = calendar.monthrange(requestTime.year, requestTime.month)[0] days = calendar.monthrange(requestTime.year, requestTime.month)[1] weeks = math.ceil((weekdayS + days) / 7) cal = [[{"date": 0, "courts": []} for _ in range(7)] for _ in range(weeks)] for i in range(1, days + 1): cal[(i + weekdayS - 1) // 7][(i + weekdayS - 1) % 7] = { "date": i, "courts": [] } ## add court to calendar for i in res: date = int(i['rentDate'][-2:]) cal[(date + weekdayS - 1) // 7][(date + weekdayS - 1) % 7]['courts'].append(i) return render(request, 'home/index.html', locals())
class DouBanMovieSpider(object): def __init__(self, config, start_id): # a dict of cookies self.cookies = None self.thread_count = int(config['spider']['thread_count']) self.network_max_try_times = int(config['network']['max_try_times']) self.movie_id_in_queue = set() self.queue = Queue() self.queue.put(start_id) self.movie_id_in_queue.add(start_id) self.logger = Loggers.get_logger(config) self.db_helper = DbHelper.DbHelper() self.login_if_necessary(config) self.proxy = proxy.AbuyunProxy(config) self.store_lock = threading.Lock() self.db_lock = threading.Lock() self.thread_list = [] def movie_exist_in_db(self, id): return self.db_helper.is_movie_id_exists(id) def start(self): for i_thread in range(self.thread_count): t = threading.Thread(target=self.scratch_movie_info, name='spider thread %d' % i_thread) self.thread_list.append(t) t.start() def join(self): for t in self.thread_list: t.join() def scratch_movie_info(self): while True: self.logger.debug("Current queue length : " + str(self.queue.qsize())) id_scratch = None try: id_scratch = self.queue.get(timeout=20) except: self.logger.warning('queue empty, exist thread: %s' % threading.current_thread().name) break self.logger.debug("Scratch from id : %s in thread : %s" % (id_scratch, threading.current_thread().name)) movie = self.get_movie_by_id(id_scratch) if not movie: self.logger.debug('did not get info from this movie(id=%s)' % id_scratch) self.queue.put(id_scratch) if not self.proxy.enable: Utils.Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND) continue next_movie_ids = movie.get('next_movie_ids', []) if len(next_movie_ids) > 0 : if self.store_lock.acquire(): for mid in next_movie_ids: if mid in self.movie_id_in_queue: continue self.logger.debug('add %s to queue(len=%d)' % (mid,len(self.movie_id_in_queue))) self.movie_id_in_queue.add(mid) self.queue.put(mid) self.store_lock.release() if self.db_lock.acquire(): self.db_helper.insert_movie(movie) self.db_lock.release() # if proxy is enable , we won't wait for seconds because the proxy will change IP very frequently if not self.proxy.enable: Utils.Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND) def get_movie_by_id(self, id): # change headers every time, it seems unnecessary when we use proxy headers = {'User-Agent': random.choice(constants.USER_AGENT)} # 获取豆瓣页面(API)数据 r = None try_times = 0 while not r: try: try_times += 1 r = requests.get( constants.URL_PREFIX + str(id), headers=headers, cookies=self.cookies, proxies=self.proxy.get() ) if not r: if try_times <= self.network_max_try_times: # wait seconds if we can't get any response Utils.Utils.delay(1, 5) else: self.logger.error('Cannot get movie(id=%s) info' % id) return None except IOError as e: self.logger.warning('request exception : %s' % str(e)) if try_times <= self.network_max_try_times: # wait for seconds if any network error represent, there must be some troubles with the network Utils.Utils.delay(1, 5) else: self.logger.error('Cannot get movie(id=%s) info' % id) return None r.encoding = 'utf-8' # 提取电影数据 movie_parser = MovieParser.MovieParser() movie_parser.set_html_doc(r.text) movie = movie_parser.extract_movie_info() if movie: movie['douban_id'] = id return movie def login_if_necessary(self, config): login_enable = int(config['login_douban']['enable']) == 1 if not login_enable: self.logger.debug('douban login enable : %s' % str(login_enable)) return cookie_helper = CookiesHelper.CookiesHelper( config['douban']['user'], config['douban']['password'] ) # 模拟用户登录 self.cookies = cookie_helper.get_cookies() self.logger.debug('cookies: %s' % str(self.cookies))
def run(): with pg.connect('dbname=ng_am user=ng_am password=ng_am') as conn, open('out', 'w') as outf: cur = conn.cursor() hitQ = Queue() failQ = Queue() try: with open('qs.json', 'r') as qf: [hitL, failL] = json.load(qf) for hit in hitL: hitQ.put((V(*[tuple(h) for h in hit[0] if h[0] < 30]), V(*hit[1]))) for fail in failL: failQ.put(fail) except FileNotFoundError: pass if hitQ.qsize() == 0: hitQ.put((V(), V())) # for each prefix I need to get the total score of all the remaining possibiliies and the scores of each of those as well, then break those into grouped letter placements. These will likely be very big so I would want to put these into the database if I can # it's important to track the number of failures to get to that point too, so the state seems to be: # set of failed guesses # pos-letter combinations # # from a given guess, we split into all of the new positions and task them to count for each sub. But the distribution is just meant for making decisions on the next letter to choose: the actual probability of being wrong is given from the score distribution of the terms that don't have the next chosen letter. Note we have to recalculate the distributions updating the arrays separately, the hit array and the fail array: these are the two dimensions. And each node yields two outcomes, one for success and one for failure to the next most likely letter # That makes sense. To queue for BFS I may have to just be prudent in how I push into the queue, just to make sure I go along the failures before I go along the axis of successes. Also note that if I do want to just make conditional probabilities in the DB then I'll have to include the failure probabilities relative to the parent. And I guess the number of failures as well. Or I can just index them properly. Each stores the index relative to the parent letters, as well as the configuration? This is also unique, which makes it hard to store into a DB other than as a JSON object since it's a list of tuples. The BFS will be dominated by the last layer, I may just hold this in memory until I find a good way to represent it on disk, maybe even just as a flat JSON. # I need to know the probability of the match failing which happens if we choose something that doesn't contain the most likely letter, which is distinct from the score of the words with the most likely letter: I need to get the sum of the scores of all the words so I can do the universal difference/ratio while not (hitQ.empty() and failQ.empty()): try: q = failQ.get(timeout=0) except Empty: q = hitQ.get() hits, fails = q print(q) flat_hits = [s for hit in hits for s in hit] sys.stdout.write('Scoring\r') cur.execute(''' SELECT COUNT(*), SUM(score) FROM words w LEFT JOIN letters l ON l.letter = ANY(%s) AND l.word = w.id WHERE l.word IS NULL ''' + "".join([' AND w.l%s=%s'] * len(hits)), tuple([list(fails)] + flat_hits) ) scorer = cur.fetchone() num, tot = scorer if num > 0: sys.stdout.write('Searching\r') cur.execute(''' SELECT st1.letter, COUNT(*), SUM(st1.score) AS s FROM ( SELECT l3.letter, w.score, w.id FROM words w LEFT JOIN letters l2 ON w.id = l2.word AND l2.letter = ANY(%s) INNER JOIN letters l3 ON l3.word = w.id WHERE l2.word IS NULL AND l3.letter <> ALL(%s) '''\ + "".join([' AND w.l%s=%s'] * len(hits))\ + ''') st1 GROUP BY st1.letter ORDER BY s DESC LIMIT 1''', # WHERE NOT (st1.letter ~ '[^A-Za-z]') tuple([list(fails), list(set(hit[1] for hit in hits))] + flat_hits) ) # AND w.length = %s nextr = cur.fetchone() if nextr != None: (next_letter, next_count, score) = nextr sys.stdout.write('Assembling\r') cur.execute(''' SELECT COUNT(*), l1.pos FROM words w LEFT JOIN letter_agg l ON l.letter = ANY(%s) AND l.word = w.id INNER JOIN letter_agg l1 ON l1.word = w.id WHERE l.letter IS NULL AND l1.letter = %s '''\ + "".join([' AND w.l%s=%s'] * len(hits)) + ' GROUP BY l1.pos', tuple([list(fails)] + [next_letter] + flat_hits) ) nexts = cur.fetchall() json.dump([flat_hits, list(fails), scorer, nextr, nexts], outf, cls=DecimalEncoder) outf.write('\n') outf.flush() for n in nexts: hitQ.put((hits + [(n_, next_letter) for n_ in n[1] if n_ < 30], fails)) failQ.put((hits, fails.append(next_letter)))
class PooledConnection(object): """连接池""" def __init__(self, connection_strings, max_count=10, min_free_count=1, monitor_log=False): self._max_count = max_count self._min_free_count = min_free_count self._connection_strings = connection_strings self._count = 0 self._queue = Queue(max_count) self._lock = threading.Lock() if monitor_log: self._run_monitor() def __del__(self): while not self._queue.empty(): conn = self._queue.get() if conn: self._close_connection(conn) else: break def _run_monitor(self): def process(p): log.info('pool connection state:pid:%s, max_count:%s,min_free_count:%s,count:%s,free_count:%s' % (os.getpid(), p._max_count, p._min_free_count, p._count, p._queue.qsize())) t = threading.Timer(5.0, process, args=(self,)) t.start() t.join() def _create_connection(self, autoCommit=True): if self._count >= self._max_count: raise PoolError('The maximum number of connections beyond!') conn = Connection(self, host=self._connection_strings.get('host'), port=self._connection_strings.get('port'), user=self._connection_strings.get('user'), password=self._connection_strings.get('password'), db=self._connection_strings.get('database'), charset='utf8', autocommit=autoCommit, cursorclass=pymysql.cursors.DictCursor) self._count += 1 return conn def release_connection(self, connection): """释放连接""" self._lock.acquire() if self._queue.qsize() >= self._min_free_count: self._close_connection(connection) else: self._queue.put(connection) self._lock.release() def get_connection(self, timeout=15): """获取一个连接""" bt = datetime.datetime.now() def get_conn(): self._lock.acquire() try: if not self._queue.empty(): conn = self._queue.get() elif self._count < self._max_count: conn = self._create_connection() else: conn = None return conn except: raise finally: self._lock.release() conn = get_conn() if conn: return conn else: if timeout: while (datetime.datetime.now() - bt).seconds < timeout: conn = get_conn() if conn: break time.sleep(0.2) if not conn: raise PoolError('Timeout!There has no enough connection to be used!') return conn def _close_connection(self, connection): """关闭连接""" try: if connection._close(): self._count -= 1 except: pass
def execute_jobs(jobs, show_progress=False, number_of_workers=10, debug_jobs=False): global windows_force_posix from vcstool.streams import stdout if debug_jobs: logger.setLevel(logging.DEBUG) if windows_force_posix: logger.debug('force POSIX paths on Windows') results = [] job_queue = Queue() result_queue = Queue() # create worker threads workers = [] for _ in range(min(number_of_workers, len(jobs))): worker = Worker(job_queue, result_queue) workers.append(worker) # fill job_queue with jobs for each worker pending_jobs = list(jobs) running_job_paths = [] while job_queue.qsize() < len(workers): job = get_ready_job(pending_jobs) if not job: break running_job_paths.append(job['client'].path) logger.debug("started '%s'" % job['client'].path) job_queue.put(job) logger.debug('ongoing %s' % running_job_paths) # start all workers [w.start() for w in workers] # collect results while len(results) < len(jobs): (job, result) = result_queue.get() logger.debug("finished '%s'" % job['client'].path) running_job_paths.remove(result['job']['client'].path) if show_progress and len(jobs) > 1: if result['returncode'] == NotImplemented: stdout.write('s') elif result['returncode']: stdout.write('E') else: stdout.write('.') if debug_jobs: stdout.write('\n') stdout.flush() result.update(job) results.append(result) if pending_jobs: for pending_job in pending_jobs: pending_job.get('depends', set()).discard(job['client'].path) while job_queue.qsize() < len(workers): job = get_ready_job(pending_jobs) if not job: break running_job_paths.append(job['client'].path) logger.debug("started '%s'" % job['client'].path) job_queue.put(job) assert running_job_paths if running_job_paths: logger.debug('ongoing ' + str(running_job_paths)) if show_progress and len(jobs) > 1 and not debug_jobs: print('', file=stdout) # finish progress line # join all workers for w in workers: w.done = True [w.join() for w in workers] return results
class Batcher: def __init__(self, vocab, bin_path, hps): assert os.path.exists(bin_path) self.vocab = vocab self.bin_path = bin_path # bin_fname = args.split_data_path.format(setname).replace('.json', '.bin') self.hps = hps self.single_pass = True if 'eval' in hps.mode else False QUEUE_MAX_SIZE = 50 self.batch_cache_size = 50 self.batch_queue = Queue(QUEUE_MAX_SIZE) self.example_queue = Queue(QUEUE_MAX_SIZE * 16)#self.hps.batch_size) self.example_thread = Thread(target=self.fill_example_queue) self.example_thread.daemon = True self.example_thread.start() self.batch_thread = Thread(target=self.fill_batch_queue) self.batch_thread.daemon = True self.batch_thread.start() def next_batch(self): if self.batch_queue.qsize() == 0: if self.single_pass: print("[*]FINISH decoding") return 'FINISH' else: print("Batch queue is empty. waiting....") raise ValueError("Unexpected finish of batching.") batch = self.batch_queue.get() return batch def fill_example_queue(self): gen = sample_generator(self.bin_path, self.single_pass) while True: try: if 'nli' in self.bin_path: premise, hypo, label = next(gen) example = Example(premise, hypo, label, None, None, self.vocab, self.hps) elif 'main' in self.bin_path: w0, w1, claim, reason, label = next(gen) example = Example(w0, w1, label, claim, reason, self.vocab, self.hps) else: raise ValueError except Exception as err: print("Error while fill example queue: {}".format(self.example_queue.qsize())) assert self.single_pass break self.example_queue.put(example) def fill_batch_queue(self): while True: if not self.single_pass: assert 'eval' not in self.hps.mode inputs = [] for _ in range(self.hps.batch_size * self.batch_cache_size): inputs.append(self.example_queue.get()) batches = [] for idx in range(0, len(inputs), self.hps.batch_size): batches.append(inputs[idx:idx + self.hps.batch_size]) if not self.single_pass: shuffle(batches) for bat in batches: self.batch_queue.put(Batch(bat, self.hps, self.vocab)) else: assert 'eval' in self.hps.mode sample = self.example_queue.get() bat = [sample for _ in range(self.hps.batch_size)] self.batch_queue.put(Batch(bat, self.hps, self.vocab))