def _schedule_processes(self, tasklist, _worker):
        # Reset the global flag that allows
        global _stop_all_processes
        _subprocess_container.stop_all = False
        # Make a shallow copy of the task list,
        # so we don't mess with the callers list.
        tasklist = copy.copy(tasklist)
        number_tasks = len(tasklist)
        if number_tasks == 0:
            totaltime = 0
            return totaltime
        use_threading = number_tasks > 1 and self.num_processes > 1
        starttime = time.process_time()
        task_queue = Queue()
        pbar = _ProgressBar(number_tasks, self.silent)
        pbar.animate(0)
        processed_tasks = []
        n_errors = 0
        threads = []
        try:
            # run while there is still threads, tasks or stuff in the queue
            # to process
            while threads or tasklist or task_queue.qsize():
                # if we aren't using all the processors AND there is still
                # data left to compute, then spawn another thread
                if (len(threads) < self.num_processes) and tasklist:
                    if use_threading:
                        t = Thread(
                            target=_worker, args=tuple([tasklist.pop(0), task_queue])
                        )
                        t.daemon = True
                        t.start()
                        threads.append(t)
                    else:
                        _worker(tasklist.pop(0), task_queue)
                else:
                    # In the case that we have the maximum number
                    # of running threads or we run out tasks.
                    # Check if any of them are done
                    for thread in threads:
                        if not thread.isAlive():
                            threads.remove(thread)
                while task_queue.qsize():
                    task = task_queue.get()
                    if task.has_error():
                        n_errors += 1
                    self.summery.task_summery(task)
                    processed_tasks.append(task)
                    pbar.animate(len(processed_tasks), n_errors)

                time.sleep(0.01)
        except KeyboardInterrupt:
            _display("Processing interrupted")
            _subprocess_container.stop_all = True
            # Add a small delay here. It allows the user to press ctrl-c twice
            # to escape this try-catch. This is usefull when if the code is
            # run in an outer loop which we want to excape as well.
            time.sleep(1)
        totaltime = time.process_time() - starttime
        return totaltime
Example #2
0
    def _cost_limited_dfs(self):
        """Run the DFS from a node

        Returns:
            (board, next_min)
            Board can either be a board or None
        """
        frontiers_list = Queue()
        frontiers_list.put(self.start_state)
        next_min = inf
        while True:
            if frontiers_list.empty():
                return (None, next_min)

            # Pop a node off the stack
            board = frontiers_list.get()
            cost = self._get_cost(board)

            # If the cost is less than the cutoff, we can continue
            if cost <= self.cut_off:
                if board.num_pegs == 1:
                    return (board, next_min)
                for index, move in enumerate(board.moves):
                    next_board = board.execute_move(index)
                    self.num_visited += 1
                    frontiers_list.put(next_board)
                    if self.max_space < frontiers_list.qsize():
                        self.max_space = frontiers_list.qsize()

            else:
                if cost < next_min:
                    next_min = cost
Example #3
0
class ProxyManager(object):

    def __init__(self):
        self.is_alive = True 
        self.proxies = Queue()
        self.scraper = Scraper()
        self.bad_proxies = BadProxies()

    def collect(self):
        while self.is_alive:
            if not self.proxies.qsize():

                for proxy in self.scraper.proxies:
                    if not proxy in self.bad_proxies:
                        self.proxies.put(proxy)
                        
            sleep(0.5)

    def bad_proxy(self, proxy):
        if not proxy in self.bad_proxies:
            self.bad_proxies.append(proxy)
            
    def get_proxy(self):
        if self.proxies.qsize():
            return self.proxies.get()
    
    def start(self):
        self.collect()
            
    def stop(self):
        self.is_alive = False 
        self.scraper.is_alive = False 
Example #4
0
class Master(threading.Thread):
    def __init__(self):
        super().__init__()
        self.setDaemon(True)
        self.que = Queue()
        self.conn = create_engine("postgresql://gaoyb7@localhost/dht_demo").connect()
        self.ins = hash_tab.insert()

    def log_in_database_demo(self, infohash, name):
        try:
            self.conn.execute(self.ins, infohash=infohash, name=name)
        except Exception as e:
            pass

    def logger(self):
        while True:
            if self.que.empty():
                sleep(1)
                continue
            else:
                r = self.que.get()
                self.log_in_database_demo(r[1], r[2])

    def run(self):
        #while True:
        #    self.fetch()
        dt = threading.Thread(target=self.logger)
        dt.setDaemon(True)
        dt.start()
        while True:
            if threading.activeCount() < 1500:
                if self.que.qsize() == 0:
                    sleep(1)
                    continue
                r = self.que.get()
                t = threading.Thread(target=fetch_metadata, args=(r[0], r[1], r[2]))
                t.setDaemon(True)
                t.start()
            else:
                sleep(1)

    def fetch(self):
        for i in range(100):
            if self.que.qsize() == 0:
                sleep(1)
                continue
            r = self.que.get()
            t = threading.Thread(target=fetch_metadata, args=(r[0], r[1], r[2]))
            t.setDaemon(True)
            t.start()

    def log(self, nid, infohash, name, address):
        #print("%s %s" % (codecs.encode(infohash, "hex_codec").decode(), name.decode("utf-8")))
        #fetch_metadata(nid, infohash, address)
        #print(self.que.qsize())
        if self.que.qsize() > 5000:
            sleep(1)
        self.que.put([nid, codecs.encode(infohash, "hex_codec").decode(), name.decode()])
Example #5
0
class Fetcher:
    def __init__(self,threads,subject):
        self.opener = urllib.request.build_opener(urllib.request.HTTPHandler)
        self.lock = Lock()
        self.q_req = Queue()
        self.q_ans = Queue()
        self.threads = threads
        self.subject = subject
        for i in range(threads):
            t = Thread(target=self.threadget,args=subject)
            t.setDaemon(True)
            t.start()
        self.running = 0

    def __del__(self):
        time.sleep(0.5)
        self.q_req.join()
        self.q_ans.join()

    def taskleft(self):
        return self.q_req.qsize()+self.q_ans.qsize()+self.running

    def push(self, req):
        self.q_req.put(req)

    def pop(self, ans):
        return self.q_ans.get()

    def download_imag(self, subject):
        global count
        s = requests.session()
        imag = s.get(subject['cover'])
        name = subject['title']
        path = '/users/peibibing/PycharmProjects/douban/douban_movie/%s.jpg'%name
        with open(path,'wb') as f:
            f.write(imag.content)
        count += 1
        print(count)
        return 'ok'

    def threadget(self,sub):
        while True:
            req = self.q_req.get()
            with self.lock:  #保证操作的原子性
                self.running += 1
            try:
                # ans = download_imag(sub)

                ans = self.opener.open(req).read()
            except Exception:
                ans = 'error'
                print(ans)
            self.q_ans.put((req,ans))
            with self.lock:
                self.running -= 1
            self.q_req.task_done()
            time.sleep(0.1)
Example #6
0
File: pingit.py Project: binhvq/nms
class PingThem():
    def __init__(self, targets, maxthreads=100):
        self.q1 = Queue(maxsize=0)
        self.q2 = Queue(maxsize=0)
        self.maxthreads = maxthreads if len(targets) >= maxthreads else len(targets)
        

        for target in targets:
            self.q1.put(target)
        logging.info("Done adding all targets")

        print(self.q1.qsize())


    def worker(self):
        while 1:
            i = self.q1.get()
            # logging.info("Got value from queue: {0}".format(i))
            # quit cond
            if i is None:
                break

            p = PingIt()
            r = p.doping(i)

            self.q2.put(r)

            self.q1.task_done()

    def run(self):
        print("Will start {0} threads for checking ...".format(self.maxthreads))
        allts = []
        for i in range(self.maxthreads):
            t = Thread(target=self.worker)
            t.start()
            allts.append(t)

        self.q1.join()

        for i in range(self.maxthreads):
            self.q1.put(None)

        for t in allts:
            t.join()

        # check q2
        logging.info(self.q2.qsize())

        ret = []
        for j in range(self.q2.qsize()):
            i = self.q2.get()
            if i is None:
                break
            ret.append(i)

        return ret
Example #7
0
def downloads(urls, outputs=[], concurrency=cpu_count()):
	# 用于线程同步的队列
	exit_queue = Queue(1)
	job_queue = Queue()
	result_queue = Queue()

	# 创建下载任务,并加入到任务队列
	outputs = [None for _ in urls] if not outputs else outputs
	for url, output in zip(urls, outputs):
		job_queue.put(Param(url, output))

	job_size = job_queue.qsize()
	works = []

	# 创建工作线程并启动
	concurrency = job_size if concurrency > job_size else concurrency
	for _ in range(concurrency):
		t = Worker(job_queue, result_queue, exit_queue)
		works.append(t)
		t.start()

	# 检测任务是否完成,主要有两种情况
	# 1.所有任务都执行了
	# 2.用户主动按ctrl+c结束任务,这里会等待已经运行的任务继续运行
	alive = True
	try:
		while alive:
			for work in works:
				if work.isAlive():
					alive = True
					break
			else:
				alive = False
			if result_queue.qsize() == job_size and exit_queue.qsize() == 0:
				exit_queue.put(1)
	except KeyboardInterrupt:
		logger.warning("ctrl + c is precessed!wait running task to complate..")
		exit_queue.put(1)
		for work in works:
			if work.isAlive():
				work.join()

	# 结果收集并返回
	results = []
	while job_queue.qsize() > 0:
		param = job_queue.get_nowait()
		results.append(Result(False, "task not excute", param.url))
	while result_queue.qsize() > 0:
		result = result_queue.get_nowait()
		results.append(result)
	return results
class Fetcher:
    def __init__(self, threads_num):
        self.opener = urllib.request.build_opener(urllib.request.HTTPHandler)
        self.lock = Lock()  # 线程锁
        self.q_req = Queue()  # 任务队列
        self.q_ans = Queue()  # 结果队列
        self.threads_num = threads_num
        for i in range(threads_num):
            t = Thread(target=self.deal_task)
            t.setDaemon(True)
            t.start()
        self.running = 0

    def __del__(self):  # 解构时需等待两个队列的任务完成
        time.sleep(0.5)
        self.q_req.join()
        self.q_ans.join()

    def task_left(self):
        return self.q_req.qsize() + self.q_ans.qsize() + self.running

    def push(self, task):
        self.q_req.put(task)

    def pop(self):
        return self.q_ans.get()

    def deal_task(self):
        while True:
            req = self.q_req.get()
            with self.lock:  # 保证该操作的原子性
                self.running += 1
            ans = self.get_data(req)
            self.q_ans.put(ans)
            with self.lock:
                self.running -= 1
            self.q_req.task_done()
            time.sleep(0.1)

    def get_data(self, req, retries=3):  # 失败后的重连机制
        data = ''
        try:
            data = self.opener.open(req, timeout=10).read()  # 设置超时时间为10秒
        except urllib.request.URLError as e:
            if retries > 0:
                return self.get_data(req, retries - 1)
            print('GET Failed.', req)
            print(e.reason)
        return data
    def train(self, texts, chunksize=100, workers = 2):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        if not training_methods_imported:
            raise NotImplementedError(err_msg)
        logger.info("training model with %i workers" % (workers))

        start, next_report = time.time(), [1.0]
        jobs = Queue(maxsize=2 * workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        total_error = [0.0]
        objects_done = [0]

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            observation_work = np.zeros(self.window * self.size + self.object_size, dtype = REAL)
            prediction_work = np.zeros(self.output_size, dtype = REAL)
            composition_work = np.zeros([max(self.output_size, self.window * self.size + self.object_size), self.window * self.size + self.object_size], dtype = REAL) if self.bilinear_form else None

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                error = sum(train_sentence_concatenation(self, sentence, object_index, softmax_target, sigmoid_target, self._alpha, prediction_work, observation_work, composition_work) for sentence, object_index, softmax_target, sigmoid_target in job)
                with lock:
                    total_error[0] += error
                    objects_done[0] += len(job)
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: %s objects, %.0f objects/s" % (objects_done[0], float(objects_done[0]) / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        dynos = [threading.Thread(target=worker_train) for _ in range(0,workers)]
        for thread in dynos:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue
        no_oov = ((np.array([self.vocab.get_index(word) for word in sentence], dtype = INT), object_index, softmax_target, sigmoid_target) for sentence, object_index, softmax_target, sigmoid_target in texts)
        for job_no, job in enumerate(grouper(no_oov, chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())

        for _ in range(0,workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in dynos:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i objects took %.1fs, %.0f words/s" %
            (objects_done[0], elapsed, objects_done[0] / elapsed if elapsed else 0.0))

        return (objects_done[0], total_error[0])
Example #10
0
class EntityQueue:
    def __init__(self, maxsize = 1000):
        self.queue = Queue(maxsize)
        self.enqueuing_flags = {}

    def put(self, item, block = True, timeout = None):
        self.queue.put(item, block, timeout=timeout)

    def get(self, block = True, timeout = None):
        return self.queue.get(block, timeout)

    def qsize(self):
        return self.queue.qsize()

    def empty(self):
        return self.queue.empty() and not self.is_enqueuing()

    def full(self):
        return self.queue.full()

    def add_enqueuing_flag(self, id):
        self.enqueuing_flags[id] = True

    def update_enqueuing_flag(self, id, state):
        self.enqueuing_flags[id] = state

    def is_enqueuing(self):
        is_enqueuing = True

        for flag in self.enqueuing_flags.values():
            is_enqueuing = is_enqueuing and flag

        return is_enqueuing
Example #11
0
class WFOpenedClosedLists(SwapPolitic):
    def __init__(self, table):
        self.max_depth = 3
        self.table = table
        self.tries = 0

        self.openedList = Queue()
        self.closedList = []

        self.openedList.put((table, 0))

    def run(self):
        while self.openedList.qsize() != 0:
            (table, depth) = self.openedList.get()
            table.printState()
            if depth >= self.max_depth:
                continue

            self.tries += 1

            if table.isValid():
                self.table = table
                return

            ret = None
            for i in range(0, table.count - 1):
                tt = table.copy()
                tt.swapTwoAdjacent(i)
                self.openedList.put((tt, depth+1))

            self.closedList.append((table, depth))
Example #12
0
def start_testing(unique_info, test_continues):
    """Multithreaded testing of files fetched from database.
    Reports first buttons found. Composes list of unique files with
    specified extentions."""
    worker_threads = Queue(TESTING_THREADS_ALLOWED)
    print("starting ", TESTING_THREADS_ALLOWED, " threads\n")
    test_thread_id = 0
    all_files_tested = False
    while time.time() - db.time_of_update[0] < DB_WATCHDOG_TIME \
           and (not all_files_tested) :
        """ Spawn threads to fetch, test files and update database until
        all files uploaded to DB and tested or no changes happened to DB
        for DB_WATCHDOG_TIME seconds."""
        print(time.time() - db.time_of_update[0])
        if worker_threads.qsize() < TESTING_THREADS_ALLOWED:
            worker_threads.put(test_thread_id)
            worker = threading.Thread(target=tester.tester, \
                                      args=(worker_threads, \
                                      conn_data, \
                                      unique_info,
                                      EXTENTION_TO_FIND,
                                      ALLOWED_APP_RUNTIME
                                      ))
            worker.setDaemon(True)
            worker.start()
            test_thread_id += 1
            time.sleep(0.01)
            if test_continues.qsize() < 2: #  tree composed and uploaded
                all_files_tested = db.check_test_completion(conn_data,
                                                        EXTENTION_TO_FIND)
    print ("Testing thread waiting for all worker-threads to complete\n")
    worker_threads.join()
    print ("Testing Thread Checked all unique ",EXTENTION_TO_FIND, " files\n")
    test_continues.get()
    test_continues.task_done()
Example #13
0
def test():
    cell_codes = [
        {"cid": 5465, "mcc": 460, "lac": 2, "mnc": 0},
        {"cid": 4198, "mcc": 460, "lac": 12, "mnc": 0},
        {"cid": 12175, "mcc": 460, "lac": 12, "mnc": 0},
        {"cid": 10, "mcc": 460, "lac": 17, "mnc": 0},
        {"cid": 8, "mcc": 460, "lac": 18, "mnc": 0},
        {"cid": 108186371, "mcc": 460, "lac": 23, "mnc": 0},
    ]

    q = Queue()

    for code in cell_codes:
        q.put(code)
        # print('qsize={}'.format(q.qsize()))
        while q.qsize() > 0:
            current_code = q.get()
            print('>>>>>>current_code={}'.format(current_code))
            d = spider(current_code)
            if isinstance(d, dict):
                print(d)
            elif d == 403 and d in ERR_CODES.keys():
                q.put(current_code)
                print('sleep {}s'.format(_sleep_time))
                time.sleep(_sleep_time)
Example #14
0
class Channel:
    def __init__(self, name: str, consumer: BrightsideConsumer) -> None:
        self._consumer = consumer
        self._name = ChannelName(name)
        self._queue = Queue()
        self._state = ChannelState.initialized

    def acknowledge(self, message: BrightsideMessage):
        self._consumer.acknowledge(message)

    @property
    def length(self) -> int:
        return self._queue.qsize()

    def name(self) -> ChannelName:
        return self._name

    def receive(self, timeout: int) -> BrightsideMessage:
        if self._state is ChannelState.initialized:
            self._state = ChannelState.started

        if not self._queue.empty():
            return self._queue.get(block=True, timeout=timeout)

        return self._consumer.receive(timeout=timeout)

    @property
    def state(self) -> ChannelState:
        return self._state

    def stop(self):
        self._queue.put(BrightsideMessageFactory.create_quit_message())
        self._state = ChannelState.stopping
Example #15
0
def loop(output, input, translator):
    q = Queue()
    input.input_queue = q
    input.start()

    dt = 0
    last_time = datetime.now()
    running = True
    while running:
        now = datetime.now()
        dt = (now - last_time).total_seconds()
        last_time = now

        while q.qsize() > 0:
            action = Action(*q.get())
            fn, args = translator.do(action)
            if fn == "halt":
                running = False
                for pin in translator.pins:
                    output.off(pin)
            else:
                getattr(output, fn)(*args)

        output.update(dt)

    output.close()
Example #16
0
def crawler(start_link = START_LINK , max_crawled = MAX_CRAWLED):
    links_viewed = []
    result = {}
    result['content'] = []
    linkqueue = Queue(0)
    linkqueue.put(start_link)
    counter = 0
    while counter < max_crawled and linkqueue.qsize() > 0:
        try:
            newLink = linkqueue.get()
            if newLink in links_viewed:
                continue
            links_viewed.append(newLink)
            new_result = pageParser(newLink)
        except Exception:
            print ("some error happend during fetching a link here")
            continue
        '''if isDuplicate(new_result['body'] , result['content']):
            continue'''
        result['content'].append(new_result)
        counter = counter + 1
        for i in range(0,len(new_result['links'])):
            linkqueue.put(LINK_PREFIX + new_result['links'][i])
            new_result['links'][i]  = LINK_PREFIX + new_result['links'][i]
    result = updateRanks(result)

    print (" finished fetching ",counter,"pages.")
    variable = json.dumps(result)
    file = open(configs.JSON_DIR , 'w')
    file.write(variable)
    return result
Example #17
0
def execute_jobs(jobs, show_progress=False, number_of_workers=10, debug_jobs=False):
    if debug_jobs:
        logger.setLevel(logging.DEBUG)

    results = []

    job_queue = Queue()
    result_queue = Queue()

    # create worker threads
    workers = []
    for _ in range(min(number_of_workers, len(jobs))):
        worker = Worker(job_queue, result_queue)
        workers.append(worker)

    # fill job_queue with jobs for each worker
    pending_jobs = list(jobs)
    running_job_paths = []
    while job_queue.qsize() < len(workers):
        job = pending_jobs.pop(0)
        running_job_paths.append(job['client'].path)
        logger.debug("started '%s'" % job['client'].path)
        job_queue.put(job)
    logger.debug('ongoing %s' % running_job_paths)

    # start all workers
    [w.start() for w in workers]

    # collect results
    while len(results) < len(jobs):
        (job, result) = result_queue.get()
        logger.debug("finished '%s'" % job['client'].path)
        running_job_paths.remove(result['job']['client'].path)
        if show_progress and len(jobs) > 1:
            if result['returncode'] == NotImplemented:
                sys.stdout.write('s')
            elif result['returncode']:
                sys.stdout.write('E')
            else:
                sys.stdout.write('.')
            if debug_jobs:
                sys.stdout.write('\n')
            sys.stdout.flush()
        result.update(job)
        results.append(result)
        if pending_jobs:
            job = pending_jobs.pop(0)
            running_job_paths.append(job['client'].path)
            logger.debug("started '%s'" % job['client'].path)
            job_queue.put(job)
        if running_job_paths:
            logger.debug('ongoing %s' % running_job_paths)
    if show_progress and len(jobs) > 1 and not debug_jobs:
        print('')  # finish progress line

    # join all workers
    for w in workers:
        w.done = True
    [w.join() for w in workers]
    return results
Example #18
0
def checksum_dir(src, local_manifest, num_threads=10):
    """
    Walk the source folder, and compute the SHA512

    Args:
        src:

    Returns:

    """

    known = {}
    logger.info("Opening up %s" % local_manifest)
    with open(local_manifest,'r') as f:
        for line in f:
            (sha512,path) = line.split(' ',1)
            path = path.lstrip('*')
            path = path.rstrip()
            known[path] = sha512
    logger.info("Loaded %i known hashes." % len(known))

    client = hdfs.InsecureClient('http://hdfs.gtw.wa.bl.uk:14000', user='******')
    #client = hdfs.InsecureClient('http://dls.httpfs.wa.bl.uk:14000', user='******')

    wq = Queue()

    print("Scanning %s" % src)
    sames = 0
    misses = 0
    for (path, dirs, files) in client.walk(src):
        # Loop through the files:
        i = 0
        for file in files:
            srcpath = posixpath.join(path,file)
            if srcpath in known:
                logger.info("Hash for %s is already known." % srcpath)
                continue
            srcstatus = client.status(srcpath)
            srchash = client.checksum(srcpath)
            if len(srchash['bytes']) != 64 or srchash['bytes'] == bytearray(64):
                raise Exception("Got nonsense hash %s" % srchash)
            logger.info("Queueing %s" %(srcpath))
            wq.put(srcpath)
            break

    logger.info("Launching %i workers to process %i files..." % (num_threads, wq.qsize()) )
    results = []
    for i in range(num_threads):
        worker = Thread(target=generate_checksums, args=(wq, results))
        worker.setDaemon(True)
        worker.start()

    # Wait for the queue to be processed:
    wq.join()

    logger.info("Appending results to %s" % local_manifest)
    for r in results:
        if r.hash:
            with open(local_manifest, 'a') as f:
                print("%s *%s" % (r.hash, r.path), file=f)
    def process_multi_threaded(self, process_queue):
        """
        Starts the multithreaded process of retrieving search data from the system
        """

        # First, make a list of threads to keep count
        threads = []
        # Make a results queue
        result_queue = Queue()

        # First, we need to loop over the result queue
        for i in range(process_queue.qsize()):
            # Make a new thread
            t = Thread(target=perform_search, args=(process_queue.get(), result_queue, ))
            threads.append(t)
            t.start()

        # Then, loop through the threads list and wait for all to end
        for i in range(len(threads)):
            # Join the threads
            threads[i].join()

        print("All processing threads finished")
        print(result_queue.qsize())

        # Then, return the queue
        return result_queue
class Kernel(Thread):
    def __init__(self):
        Thread.__init__(self)
        self.programsQueue = Queue()
        self.isFirstLoad = True
        self.shouldShutDown= False

    def initializeKernel(self, clock, programloader, scheduler):
        self.programLoader = programloader
        self.scheduler = scheduler
        self.clock = clock

    def load(self, program):
        # Sets a program that the program loader will load to the memory
        self.programsQueue.put_nowait(program)

    def run(self):
        Thread.run(self)

        while not self.shouldShutDown:
            if not self.programsQueue.qsize() == 0:
                program = self.programsQueue.get_nowait()
                self.isFirstLoad = len(self.programLoader.pcbTable.pcbs) == 0
                self.programLoader.load(program)
                if self.isFirstLoad:
                    self.scheduler.setNextPcbToCpu()
    def search_multithreaded(self, query_text, limit=0):
        """
        Performs the search in multithreaded manner
        Searching for 1-100, then 101-200, 201-300, etc...
        """

        # list to keep track of all the threads
        threads = []
        # Make the results queue
        result_queue = Queue()
        # Limit the query if wanted
        if limit == 0:
            limit = int(self._threads)

        # First, initialize all the threads
        for i in range(limit):
            t = Thread(target=create_and_perform_query, args=(query_text, result_queue, (i*100 + 1),))
            threads.append(t)
            t.start()

        # Then, loop through all the results, ordering to wait until all are done
        for i in range(len(threads)):
            threads[i].join()

        print("All search threads finished")
        print(result_queue.qsize())

        # Then, return the queue
        return result_queue
Example #22
0
def main():
    """ Function makes whole job.
    """
    queue = Queue()
    pages = (URL + str(i + 1) for i in range(44))

    t0 = time()

    with Pool(10) as p:
        for links in p.imap_unordered(get_links, pages):
            for link in links:
                queue.put(link)

    t1 = time()

    with Pool(20) as p:
        for entry in p.imap_unordered(get_entry, drain(queue)):
            queue.put(entry)

    t2 = time()

    print()
    print(t1 - t0)
    print('entries:', queue.qsize())
    print(t2 - t1)

    with open('data.json', 'w') as f:
        json.dump(queue2list(queue), f)
class BlockingShellSocketChannel(ShellSocketChannel):

    def __init__(self, context, session, address=None):
        super(BlockingShellSocketChannel, self).__init__(context, session,
                                                        address)
        self._in_queue = Queue()

    def call_handlers(self, msg):
        #io.rprint('[[Shell]]', msg) # dbg
        self._in_queue.put(msg)

    def msg_ready(self):
        """Is there a message that has been received?"""
        if self._in_queue.qsize() == 0:
            return False
        else:
            return True

    def get_msg(self, block=True, timeout=None):
        """Get a message if there is one that is ready."""
        return self._in_queue.get(block, timeout)

    def get_msgs(self):
        """Get all messages that are currently ready."""
        msgs = []
        while True:
            try:
                msgs.append(self.get_msg(block=False))
            except Empty:
                break
        return msgs
Example #24
0
class TrapUnitContainer(FUnitContainer):
    def __init__(self, configuration, machine):
        super().__init__(configuration, machine)
        self.funits = [TrapUnit(machine) for i in range(self.numUnits)]
        self.trapQueue = Queue()

    def issue(self, instr):
        if instr.strOpcode not in self.instructions:
            return False
        if instr.funCode == 0:
            self.machine.haltIssued = True
            log("Halt issued.")
        if not self.hasOpenRStation():
            return False
        rStation = self.getOpenRStation()
        rStation.issue(instr)
        self.trapQueue.put(rStation)
        return True

    def execute(self):
        if not self.trapQueue.qsize(): return
        nextTrap = self.trapQueue.queue[0]
        if nextTrap.readyToExecute():
            log('{0} beginning execution.'.format(nextTrap.name))
            nextTrap.beginExecution()
        elif nextTrap.executing and nextTrap.execTime > 0:
            nextTrap.decreaseExecTime()
            log('{0} continuing execution. Time left: {1}'.format(nextTrap.name, nextTrap.execTime))
        elif nextTrap.execTime == 0 and not nextTrap.resultReady:
            log('{0} completing execution.'.format(nextTrap.name))
            nextTrap.computeResult()
            _ = self.trapQueue.get()
Example #25
0
   def monitor(self, core, mem):
      startTime = time.time()
      avgMH = Queue()
      while ((time.time() - startTime) < self.monitorTime):
         time.sleep(5)
         devInfo = self.api.getGPUInfo(self.device)
         if devInfo['Temperature'] >= self.maxTemp:        
            self.handleBadClocks('Temperature threshold reached.', devInfo)
            return True
         if devInfo['HWE'] > self.HWE:
            self.HWE = devInfo['HWE']
            self.handleBadClocks('Hardware errors found.', devInfo)
            #Make sure we give the GPU time to set the new clocks so we get the final HW error count
            time.sleep(2)
            devInfo = self.api.getGPUInfo(self.device)
            self.HWE = devInfo['HWE']
            return True

         avgMH.put(devInfo['MH'])
         if (avgMH.qsize() >= 3):
            avgMH.get()


      #MH added should be averaged
      totalMH = 0
      numMH = 0
      while (not avgMH.empty()):
         totalMH += avgMH.get()
         numMH += 1
      avg = totalMH/numMH
      newrec = {'device': self.device, 'core': core, 'mem': mem, 'success': True, 'MH': avg, 'temp': devInfo['Temperature']}
      self.results.append(newrec)
      self.logger.addRecord(newrec)
      return False
Example #26
0
def using_queue(values):
    queue = Queue()
    for x in values:
        queue.put(x)
    while queue.qsize() > 1:
        pair = (queue.get(), queue.get())
        queue.put(pair)
    return queue.get()
Example #27
0
class Open163Parser(object):
	"""Get course videoe URLs from www.open.163.com"""

	def __init__(self, courseURL):
		self.courseURL = courseURL
		self.resQ = Queue()

	def _parseSrtURL(self, srtURL):
		"""
		接受匹配到的双语字幕URL为参数,提取出中英文字幕URL。
		如果只需要双语字幕则不需要调用此接口。
		"""
		encodedQuery = urlparse.urlsplit(srtURL).query
		# note the title of srt is encoded in GB2312.
		srtInfo = urlparse.parse_qs(encodedQuery, encoding='gb2312')
		srtInfo["cn-en"] = srtURL
		return srtInfo

	def getLectureURLs(self, courseURL):
		"""
		根据课程主页面提取出每一节课的URL,用于调用 Flvcd API。
		"""
		res = requests.get(courseURL)
		if res.status_code != 200:
			print("Unable to find course page.")
		else:
			doc = Soup(res.text, "html.parser")
			lectures = doc.find(id="list2").find_all("td", "u-ctitle")
			lectURLs = []
			for lect in lectures:
				lectURLs.append(lect.a["href"])
			print("Totally ", len(lectURLs), "lectures.")
			return lectURLs

	# multithreading methods below:

	def fillQ(self, queue):
		"""
		fill the queue for multithreading.
		"""
		lectURLs = self.getLectureURLs(self.courseURL)
		for url in lectURLs:
			queue.put(url)

	def putResult(self, videoInfo):
		"""
		put videoInfo into result queue.
		"""
		self.resQ.put(videoInfo)

	def getResult(self):
		"""
		return results.
		"""
		videoList = []
		while self.resQ.qsize() > 0:
			videoList.append(self.resQ.get())
		return tuple(videoList)
Example #28
0
 def test_run(self):
     """
     Test drain's run function (in a same thread)
     """
     source = range(5)
     destination = Queue()
     drain = Drain(source, destination)
     drain.run()
     assert destination.qsize() == 5
Example #29
0
    def take_action(self, parsed_args):
        config = RawConfigParser()
        # FIXME: support running outside of the site root
        # FIXME: enforce mode 600!
        config.read([".simple-cloud-site.cfg"])

        source_dir = os.path.realpath(os.curdir)

        # TODO: allow provider configuration
        container_name = config.get("site", "container")

        logging.info('Publishing %s to %s', source_dir, container_name)

        driver, container = get_driver_instance(config, container_name)

        upload_queue = Queue()

        workers = [Thread(target=upload_worker, args=(i, upload_queue, config, container_name))
                   for i in range(8)]

        remote_objects = {i.name: i.hash for i in container.list_objects()}

        for f in find_files(source_dir):
            target_path = f.replace(source_dir, '').lstrip("/")

            # TODO: load ignore list from site config
            if target_path.endswith(".scss"):
                continue

            if target_path in remote_objects:
                # TODO: don't read the file twice:
                with open(f, "rb") as local_file:
                    h = md5()
                    for chunk in iter(lambda: local_file.read(8192), b''):
                        h.update(chunk)
                    if h.hexdigest() == remote_objects[target_path]:
                        continue

            mime_type, encoding = mimetypes.guess_type(f)
            if not mime_type:
                mime_type = 'application/octet-stream'
            upload_queue.put({'object_name': target_path, 'file_path': f, 'extra': {'content_type': mime_type}})

        logging.info('Waiting for %d uploads to complete…', upload_queue.qsize())

        for worker in workers:
            worker.setDaemon(True)
            worker.start()

        upload_queue.join()

        logging.info('Configuring static site…')
        driver.ex_enable_static_website(container=container, index_file='index.html')
        driver.ex_set_error_page(container=container, file_name='error.html')
        driver.enable_container_cdn(container=container)

        print('CDN URL:', driver.get_container_cdn_url(container=container))
Example #30
0
class ConnectionPool(object):
    """
    Simple connection-caching pool implementation.

    ConnectionPool provides the simplest possible connection pooling,
    lazily creating new connections if needed as `borrow_connection' is
    called.  Connections are re-added to the pool by `return_connection',
    unless doing so would exceed the maximum pool size.

    Example usage:
    >>> pool = ConnectionPool("localhost", 9160, "Keyspace1")
    >>> conn = pool.borrow_connection()
    >>> conn.execute(...)
    >>> pool.return_connection(conn)
    """
    def __init__(self, hostname, port=9160, keyspace=None, username=None,
                 password=None, decoder=None, max_conns=25, max_idle=5,
                 eviction_delay=10000):
        self.hostname = hostname
        self.port = port
        self.keyspace = keyspace
        self.username = username
        self.password = password
        self.decoder = decoder
        self.max_conns = max_conns
        self.max_idle = max_idle
        self.eviction_delay = eviction_delay

        self.connections = Queue()
        self.connections.put(self.__create_connection())
        self.eviction = Eviction(self.connections,
                                 self.max_idle,
                                 self.eviction_delay)

    def __create_connection(self):
        return Connection(self.hostname,
                          port=self.port,
                          keyspace=self.keyspace,
                          username=self.username,
                          password=self.password,
                          decoder=self.decoder)

    def borrow_connection(self):
        try:
            connection = self.connections.get(block=False)
        except Empty:
            connection = self.__create_connection()
        return connection

    def return_connection(self, connection):
        if self.connections.qsize() > self.max_conns:
            connection.close()
            return
        if not connection.is_open():
            return
        self.connections.put(connection)
Example #31
0
def ana(request):
    '''
    res: [
        [0, 0, 0, 0, 0, 0, 0, 0], # day 1 court 4,5,6,7 18:00 / court 4,5,6,7 20:00
        [0, 0, 0, 0, 0, 0, 0, 0], # day 2 court 4,5,6,7 18:00 / court 4,5,6,7 20:00
        ...
    ]
    cal: [
        {
            "date": 1, # day 1
            "sticks": [0, 0, 0, 0, 0, 0, 0, 0] # just like res
            "colors": ["", "", "", "", "", "", "", ""] # color of each time slice which depends on the quantity of sticks
        },
        ...
    ]
    '''

    if 'month' in request.GET and request.GET['month']:
        requestMonth = int(request.GET['month'])
    else:
        requestMonth = (datetime.now() + timedelta(days=31)).month

    if 'year' in request.GET and request.GET['year']:
        requestYear = int(request.GET['year'])
    else:
        requestYear = (datetime.now() + timedelta(days=31)).year

    currentYear = datetime.now().year  ## for copyright year
    requestTime = datetime(requestYear, requestMonth, 1)
    requestDateS = datetime(requestTime.year, requestTime.month,
                            1).strftime("%Y-%m-%d")  ## yyyy-MM-dd
    requestDateE = datetime(
        requestTime.year, requestTime.month,
        calendar.monthrange(requestTime.year, requestTime.month)[1]).strftime(
            "%Y-%m-%d")  ## yyyy-MM-dd
    monthList = [i for i in range(1, 13)]
    monthselect = [""] * 12
    monthselect[requestMonth - 1] = "selected"

    ## calendar info
    weekdayS = calendar.monthrange(requestTime.year, requestTime.month)[0]
    days = calendar.monthrange(requestTime.year, requestTime.month)[1]
    weeks = math.ceil((weekdayS + days) / 7)

    res = [[0] * 8 for i in range(days)]
    isDrawn = True
    q = Queue()
    threads = []

    for court in requestvenueId:
        key = {
            'rentDateS': requestDateS,
            'rentDateE': requestDateE,
            'venueId': court
        }
        t = threading.Thread(target=threadAna, args=(q, days, key))
        t.start()
        threads.append(t)

    for thread in threads:
        thread.join()

    for _ in range(q.qsize()):
        data = q.get()
        for i in range(days):
            res[i][data['court'] - 4] = data['res'][i][0]
            res[i][data['court']] = data['res'][i][1]
            isDrawn &= data['isDrawn']

    ## calendar
    cal = [[{
        "date": 0,
        "sticks": [],
        "colors": [],
        "colColor": False
    } for _ in range(7)] for _ in range(weeks)]
    for i in range(1, days + 1):
        color = res[i - 1].copy()
        for j in range(8):
            if color[j] == 0: color[j] = ""
            elif color[j] < 5: color[j] = "green"
            elif color[j] < 10: color[j] = "orange"
            else: color[j] = "red"
        cal[(i + weekdayS - 1) // 7][(i + weekdayS - 1) % 7] = {
            "date": i,
            "sticks": res[i - 1],
            "colors": color,
            "colColor": (i + weekdayS - 1) % 7 in [0, 4]
        }

    return render(request, 'home/ana.html', locals())
Example #32
0
class Takeover(Module):
    """
    OneForAll多线程子域接管风险检查模块

    Example:
        python3 takeover.py --target www.example.com  --format csv run
        python3 takeover.py --target ./subdomains.txt --thread 10 run

    Note:
        参数format可选格式有'txt', 'rst', 'csv', 'tsv', 'json', 'yaml', 'html',
                          'jira', 'xls', 'xlsx', 'dbf', 'latex', 'ods'
        参数path默认None使用OneForAll结果目录生成路径

    :param any target:  单个子域或者每行一个子域的文件路径(必需参数)
    :param int thread:  线程数(默认100)
    :param str format:  导出格式(默认csv)
    :param str path:    导出路径(默认None)
    """
    def __init__(self, target, thread=100, path=None, format='csv'):
        Module.__init__(self)
        self.subdomains = set()
        self.module = 'Check'
        self.source = 'Takeover'
        self.target = target
        self.thread = thread
        self.path = path
        self.format = format
        self.fingerprints = None
        self.subdomainq = Queue()
        self.cnames = list()
        self.results = Dataset()

    def save(self):
        logger.log('DEBUG', '正在保存检查结果')
        if self.format == 'txt':
            data = str(self.results)
        else:
            data = self.results.export(self.format)
        utils.save_data(self.path, data)

    def compare(self, subdomain, cname, responses):
        domain_resp = self.get('http://' + subdomain, check=False)
        cname_resp = self.get('http://' + cname, check=False)
        if domain_resp is None or cname_resp is None:
            return

        for resp in responses:
            if resp in domain_resp.text and resp in cname_resp.text:
                logger.log('ALERT', f'{subdomain}存在子域接管风险')
                self.results.append([subdomain, cname])
                break

    def worker(self, subdomain):
        cname = get_cname(subdomain)
        if cname is None:
            return
        maindomain = get_maindomain(cname)
        for fingerprint in self.fingerprints:
            cnames = fingerprint.get('cname')
            if maindomain not in cnames:
                continue
            responses = fingerprint.get('response')
            self.compare(subdomain, cname, responses)

    def check(self):
        while not self.subdomainq.empty():  # 保证域名队列遍历结束后能退出线程
            subdomain = self.subdomainq.get()  # 从队列中获取域名
            self.worker(subdomain)
            self.subdomainq.task_done()

    def progress(self):
        # 设置进度
        bar = tqdm()
        bar.total = len(self.subdomains)
        bar.desc = 'Progress'
        bar.ncols = 60
        while True:
            done = bar.total - self.subdomainq.qsize()
            bar.n = done
            bar.update()
            if done == bar.total:  # 完成队列中所有子域的检查退出
                break
        # bar.close()

    def run(self):
        start = time.time()
        logger.log('INFOR', f'开始执行{self.source}模块')
        self.subdomains = utils.get_domains(self.target)
        self.format = utils.check_format(self.format, len(self.subdomains))
        timestamp = utils.get_timestamp()
        name = f'all_subdomain_{timestamp}'
        self.path = utils.check_path(self.path, name, self.format)
        if self.subdomains:
            logger.log('INFOR', f'正在检查子域接管风险')
            self.fingerprints = get_fingerprint()
            self.results.headers = ['subdomain', 'cname']
            # 创建待检查的子域队列
            for domain in self.subdomains:
                self.subdomainq.put(domain)
            # 检查线程
            for _ in range(self.thread):
                check_thread = Thread(target=self.check, daemon=True)
                check_thread.start()
            # 进度线程
            progress_thread = Thread(target=self.progress, daemon=True)
            progress_thread.start()

            self.subdomainq.join()
            self.save()
        else:
            logger.log('FATAL', f'获取域名失败')
        end = time.time()
        elapsed = round(end - start, 1)
        logger.log(
            'INFOR', f'{self.source}模块耗时{elapsed}秒'
            f'发现{len(self.results)}个子域存在接管风险')
        logger.log('DEBUG', f'结束执行{self.source}模块')
Example #33
0
class QAPIWorker(object):
    """QAPI Worker is one IoticAgent instance
    """
    def __init__(self,
                 details,
                 managerStop,
                 keepFeeddata=DATA_KEEP,
                 keepControlreq=DATA_KEEP,
                 keepUnsolicited=DATA_KEEP,
                 sleepOnIdle=SLEEP_ON_IDLE):
        #
        self.__details = details
        self.__stop = Event()
        self.__managerStop = managerStop

        self.__keepFeeddata = 0
        try:
            self.__keepFeeddata = int(keepFeeddata)
        except:
            logger.warning("QAPIWorker failed to int keepFeeddata '%s'",
                           keepFeeddata)

        self.__keepControlreq = 0
        try:
            self.__keepControlreq = int(keepControlreq)
        except:
            pass

        self.__keepUnsolicited = 0
        try:
            self.__keepUnsolicited = int(keepUnsolicited)
        except:
            pass
        #
        self.__qFeeddata = Queue()
        self.__qControlreq = Queue()
        self.__qUnsolicited = Queue()
        #
        self.__thread = None
        self.__qc = None  # IoticAgent.Core.Client instance
        #
        self.__sleep_on_idle = sleepOnIdle
        self.__qc_last = 0  # Last time qc was used for a request
        self.__qc_running = False  # QC is sleeping? (started (True) or stopped (False))
        #
        self.__polltime = 5
        self.__dead_max = 6  # How many times to try to start a dead worker?
        self.__dead_sleep = 5  # How long to sleep between worker.start attempts?
        #
        # IoticAgent Core throttle settings
        self.__queue_size = 128
        if 'queue_size' in details:
            self.__queue_size = details['queue_size']
        self.__throttle_conf = '540/30,1890/300'
        if 'throttle' in details:
            self.__throttle_conf = details['throttle']
        #
        self.__vhost = 'container1'
        if 'vhost' in details:
            self.__vhost = details['vhost']
        #
        self.__sslca = None
        if 'sslca' in details:
            self.__sslca = details['sslca']
        #
        self.__prefix = ''
        if 'prefix' in details:
            self.__prefix = details['prefix']

    def check_details(self, details):
        for ek in details:
            if ek in self.__details:
                if self.__details[ek] != details[ek]:
                    return False
        return True

    def check_authtoken(self, authToken):
        if 'authtokens' in self.__details:
            tokens = self.__details['authtokens'].strip().split("\n")
            for et in tokens:
                if et == authToken:
                    return True
        elif authToken == self.__details[
                'token']:  # todo: Allow Agent token as API Key ??
            return True
        return False

    def start(self):
        self.__thread = Thread(target=self.__run)
        self.__thread.start()

    def stop(self):
        self.__stop.set()
        self.__thread.join()

    def __wake_agent(self):
        if not self.__qc_running:
            logger.info("QAPIWorker %s Waking", self.__details['epid'])
            self.__qc.start()
            self.__qc_running = True
        self.__qc_last = monotonic()

    @property
    def default_lang(self):
        return self.__qc.default_lang

    def request_entity_create(self, lid, tepid=None):
        self.__wake_agent()
        return self.__qc.request_entity_create(lid, epId=tepid)

    def request_entity_rename(self, lid, newlid):
        self.__wake_agent()
        return self.__qc.request_entity_rename(lid, newlid)

    def request_entity_reassign(self, lid, nepid=None):
        self.__wake_agent()
        return self.__qc.request_entity_reassign(lid, nepid)

    def request_entity_delete(self, lid):
        self.__wake_agent()
        return self.__qc.request_entity_delete(lid)

    def request_entity_list(self, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_entity_list(limit=limit, offset=offset)

    def request_entity_list_all(self, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_entity_list_all(limit=limit, offset=offset)

    def request_entity_meta_get(self, lid, fmt="n3"):
        self.__wake_agent()
        return self.__qc.request_entity_meta_get(lid, fmt=fmt)

    def request_entity_meta_set(self, lid, meta, fmt="n3"):
        self.__wake_agent()
        return self.__qc.request_entity_meta_set(lid, meta, fmt=fmt)

    def request_entity_meta_setpublic(self, lid, public=True):
        self.__wake_agent()
        return self.__qc.request_entity_meta_setpublic(lid, public=public)

    def request_entity_tag_update(self, lid, tags, delete=False):
        self.__wake_agent()
        return self.__qc.request_entity_tag_update(lid, tags, delete=delete)

    def request_entity_tag_list(self, lid, limit=100, offset=0):
        self.__wake_agent()
        return self.__qc.request_entity_tag_list(lid,
                                                 limit=limit,
                                                 offset=offset)

    def request_point_create(self,
                             foc,
                             lid,
                             pid,
                             control_cb=None,
                             save_recent=0):
        self.__wake_agent()
        return self.__qc.request_point_create(foc,
                                              lid,
                                              pid,
                                              control_cb=control_cb,
                                              save_recent=save_recent)

    def request_point_rename(self, foc, lid, pid, newpid):
        self.__wake_agent()
        return self.__qc.request_point_rename(foc, lid, pid, newpid)

    def request_point_confirm_tell(self,
                                   foc,
                                   lid,
                                   pid,
                                   success=True,
                                   requestId=None):
        self.__wake_agent()
        return self.__qc.request_point_confirm_tell(foc,
                                                    lid,
                                                    pid,
                                                    success=success,
                                                    requestId=requestId)

    def request_point_delete(self, foc, lid, pid):
        self.__wake_agent()
        return self.__qc.request_point_delete(foc, lid, pid)

    def request_point_list(self, foc, lid, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_point_list(foc,
                                            lid,
                                            limit=limit,
                                            offset=offset)

    def request_point_list_detailed(self, foc, lid, pid):
        self.__wake_agent()
        return self.__qc.request_point_list_detailed(foc, lid, pid)

    def request_point_meta_get(self, foc, lid, pid, fmt="n3"):
        self.__wake_agent()
        return self.__qc.request_point_meta_get(foc, lid, pid, fmt=fmt)

    def request_point_meta_set(self, foc, lid, pid, meta, fmt="n3"):
        self.__wake_agent()
        return self.__qc.request_point_meta_set(foc, lid, pid, meta, fmt=fmt)

    def request_point_value_create(self,
                                   lid,
                                   pid,
                                   foc,
                                   label,
                                   vtype,
                                   lang=None,
                                   comment=None,
                                   unit=None):
        self.__wake_agent()
        return self.__qc.request_point_value_create(lid,
                                                    pid,
                                                    foc,
                                                    label,
                                                    vtype,
                                                    lang=lang,
                                                    comment=comment,
                                                    unit=unit)

    def request_point_value_delete(self, lid, pid, foc, label=None):
        self.__wake_agent()
        return self.__qc.request_point_value_delete(lid, pid, foc, label=label)

    def request_point_value_list(self, lid, pid, foc, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_point_value_list(lid,
                                                  pid,
                                                  foc,
                                                  limit=limit,
                                                  offset=offset)

    def request_point_tag_update(self, foc, lid, pid, tags, delete=False):
        self.__wake_agent()
        return self.__qc.request_point_tag_update(foc,
                                                  lid,
                                                  pid,
                                                  tags,
                                                  delete=delete)

    def request_point_tag_list(self, foc, lid, pid, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_point_tag_list(foc,
                                                lid,
                                                pid,
                                                limit=limit,
                                                offset=offset)

    def request_sub_create(self, lid, foc, gpid, callback=None):
        self.__wake_agent()
        return self.__qc.request_sub_create(lid, foc, gpid, callback=callback)

    def request_sub_create_local(self, slid, foc, lid, pid, callback=None):
        self.__wake_agent()
        return self.__qc.request_sub_create_local(slid,
                                                  foc,
                                                  lid,
                                                  pid,
                                                  callback=callback)

    def request_point_share(self, lid, pid, data, mime=None):
        self.__wake_agent()
        return self.__qc.request_point_share(lid, pid, data, mime=mime)

    def request_sub_ask(self, sub_id, data, mime=None):
        self.__wake_agent()
        return self.__qc.request_sub_ask(sub_id, data, mime=mime)

    def request_sub_tell(self, sub_id, data, timeout, mime=None):
        self.__wake_agent()
        return self.__qc.request_sub_tell(sub_id, data, timeout, mime=mime)

    def request_sub_delete(self, sub_id):
        self.__wake_agent()
        return self.__qc.request_sub_delete(sub_id)

    def request_sub_list(self, lid, limit=500, offset=0):
        self.__wake_agent()
        return self.__qc.request_sub_list(lid, limit=limit, offset=offset)

    def request_sub_recent(self, sub_id, count=None):
        self.__wake_agent()
        return self.__qc.request_sub_recent(sub_id, count=count)

    def request_search(self,
                       text=None,
                       lang=None,
                       location=None,
                       unit=None,
                       limit=100,
                       offset=0,
                       type_='full',
                       scope=IoticAgentCore.Const.SearchScope.PUBLIC):
        self.__wake_agent()
        return self.__qc.request_search(text=text,
                                        lang=lang,
                                        location=location,
                                        unit=unit,
                                        limit=limit,
                                        offset=offset,
                                        type_=type_,
                                        scope=scope)

    def request_describe(self,
                         guid,
                         scope=IoticAgentCore.Const.DescribeScope.AUTO):
        self.__wake_agent()
        return self.__qc.request_describe(guid, scope=scope)

    def __cb_feeddata(self, data):
        if self.__keepFeeddata == 0:
            return
        while self.__qFeeddata.qsize() > self.__keepFeeddata:
            self.__qFeeddata.get()  # throw away element
        self.__qFeeddata.put(data)

    def get_feeddata(self):
        ret = []
        while self.__qFeeddata.qsize() > 0:
            ret.append(self.__qFeeddata.get())
        return ret

    def __cb_controlreq(self, data):
        if self.__keepControlreq == 0:
            return
        while self.__qControlreq.qsize() > self.__keepControlreq:
            self.__qControlreq.get()  # throw away element
        self.__qControlreq.put(data)

    def get_controlreq(self):
        ret = []
        while self.__qControlreq.qsize() > 0:
            ret.append(self.__qControlreq.get())
        return ret

    def __cb_unsolicited(self, data):
        if self.__keepUnsolicited == 0:
            return
        while self.__qUnsolicited.qsize() > self.__keepUnsolicited:
            self.__qUnsolicited.get()  # throw away element
        self.__qUnsolicited.put(data)

    def get_unsolicited(self):
        ret = []
        while self.__qUnsolicited.qsize() > 0:
            ret.append(self.__qUnsolicited.get())
        return ret

    def __run(self):
        #
        self.__qc = IoticAgentCore.Client(self.__details['host'],
                                          self.__vhost,
                                          self.__details['epid'],
                                          self.__details['passwd'],
                                          self.__details['token'],
                                          prefix=self.__prefix,
                                          sslca=self.__sslca,
                                          send_queue_size=self.__queue_size,
                                          throttle_conf=self.__throttle_conf)
        # network_retry_timeout=10,  # todo: override in config ?
        #
        # Keep trying to start the worker
        dead_count = 0
        done = False
        while not done and not self.__stop.is_set() and\
                not self.__managerStop.is_set() and\
                dead_count < self.__dead_max:
            done = True
            try:
                self.__wake_agent()
            except:
                logger.error("Worker %s FAILED TO START sleep(%i)...",
                             self.__details['epid'], self.__dead_sleep)
                done = False
                dead_count += 1
                sleep(self.__dead_sleep)
        if dead_count >= self.__dead_max:
            return
        #
        self.__qc.register_callback_feeddata(self.__cb_feeddata)
        self.__qc.register_callback_controlreq(self.__cb_controlreq)
        self.__qc.register_callback_reassigned(self.__cb_unsolicited)
        self.__qc.register_callback_subscription(self.__cb_unsolicited)
        #
        while not self.__stop.is_set() and not self.__managerStop.is_set():
            logger.debug("QAPIWorker %s still running", self.__details['epid'])
            self.__stop.wait(self.__polltime)
            #
            if self.__qc_running and (monotonic() - self.__qc_last >
                                      self.__sleep_on_idle):
                logger.info("QAPIWorker %s Sleeping", self.__details['epid'])
                self.__qc.stop()
                self.__qc_running = False
        # Clean-up
        try:
            self.__qc.stop()
        except:
            pass
Example #34
0
def crawl(seeds,
          username,
          password,
          site_name,
          config,
          outf=None,
          dout=None,
          ngout=None):
    'Crawl CDP/LLDP Neighbors to build a topology'

    # Queue for devices to scrape next
    q = Queue()

    # Queue for neighbor output from threads
    out_q = Queue()

    # Visited list for loop detection
    visited = list()

    # All Neighbor Entries
    neighbors = list()

    # Device entries for connection details (ipv4, os etc)
    devices = dict()

    # Thread tracking
    qtrack = dict()

    # Thread previous join attempts
    joined = list()

    # Distance tracking
    distances = dict()

    # Counter
    crawl_count = 0
    iteration_count = 0

    # Queue up seed devices
    for s in seeds:
        q.put(s)
        devices[s] = dict()
        devices[s]['remote_device_id'] = s
        devices[s]['ipv4'] = s
        devices[s]['os'] = config['main']['seed_os']
        devices[s]['platform'] = 'Unknown'
        devices[s]['logged_in'] = True
        distances[s] = 0

    # Outer Queue, starts inner queue and then adds all unvisited neighbors to queue when
    # inner queue is empty. Each iteration of outer queue visits all next level neighbors
    # at once inside inner queue via threads.
    while not q.empty():
        iteration_count += 1
        cqsize = q.qsize()
        if not config['main']['quiet']:
            if int(config['main']
                   ['log_level']) >= logging.WARNING and iteration_count > 1:
                pbar = tqdm(total=cqsize, unit='dev')
                pbar.set_description('Iteration %s' % str(iteration_count))

        # Launch threads on everything in queue to scrape
        while not q.empty():
            current = q.get()
            qsize = q.qsize()

            # Progress bar on warning level or above
            if not config['main']['quiet']:
                if int(config['main']['log_level']
                       ) >= logging.WARNING and iteration_count > 1:
                    p_int = (cqsize - qsize)
                    pbar.update(1)
                    print('\r', end='')

            if crawl_count > int(config['main']['max_crawl']):
                logger.warning('Max Devices allowed already crawled')

            # Only scrape unvisited devices
            elif current not in visited:
                crawl_count += 1

                visited.append(current)
                while threading.activeCount() > int(
                        config['main']['thread_count']):
                    qsize = q.qsize()
                    logger.debug('Waiting for free thread - Q Size: %s',
                                 str(qsize))
                    sleep(1)
                # Throttle connections
                sleep(0.1)
                logger.info('Processing %s', current)

                # Start thread to scrape devices
                nd_thread = threading.Thread(target=gather_nd, \
                    kwargs={"device": devices[current], "username": username, \
                            "password": password, "out_q": out_q, \
                            "qtrack": qtrack})
                nd_thread.start()

        # Join all threads from last iteration and warn if problems joining threads
        logger.info('Joining all active threads')
        main_thread = threading.currentThread()
        wait_timer = 15
        for some_thread in threading.enumerate():
            if some_thread != main_thread:
                tid = str(some_thread.ident)
                if tid in qtrack:
                    tid = qtrack[tid]
                if tid not in joined:
                    joined.append(tid)
                    logger.debug('Joining Thread: %s', tid)
                    some_thread.join(timeout=wait_timer)
                    wait_timer = 1
                else:
                    logger.info('Thread running long time, ignoring: %s: %s',
                                tid, str(some_thread))

        # Process output queue of neighbor data and look for new neighbors to visit
        logger.info('Processing output queue')
        while not out_q.empty():
            nd = out_q.get()

            # Gather distance info
            for n in nd:
                if n['local_device_id'] not in distances:
                    distances[n['local_device_id']] = 100
                if n['remote_device_id'] in distances:
                    if distances[n['local_device_id']] > (
                            distances[n['remote_device_id']] + 1):
                        distances[n['local_device_id']] = distances[
                            n['remote_device_id']] + 1
                        logger.info('Found new distances on %s: %s', n['local_device_id'], \
                                    str(distances[n['remote_device_id']] + 1))

            # Save all neighbor data
            for n in nd:
                n['distance'] = distances[n['local_device_id']]
                neighbors.append(n)
                rname = n['remote_device_id']

                # Save device to devices
                if rname not in devices:
                    devices[rname] = n
                # Update unknown devices, restore logged_in variable
                elif devices[rname]['platform'] == 'Unknown':
                    logged_in = False
                    if 'logged_in' in devices[rname]:
                        logged_in = devices[rname]['logged_in']
                    devices[rname] = n
                    devices[rname]['logged_in'] = logged_in

                # Save logged_in as False initially, update on another pass
                if 'logged_in' not in devices[n['local_device_id']]:
                    devices[n['local_device_id']]['logged_in'] = False

                # Local device always was logged in to
                devices[n['local_device_id']]['logged_in'] = True
                logger.info('Processing Out_q entry %s on %s', rname,
                            n['local_device_id'])

                # New Neighbor that has not been scraped, only scrape IOS/NXOS for now
                if rname not in visited:
                    if n['os'] == 'cisco_nxos':
                        if rname not in q.queue:
                            q.put(rname)
                    elif n['os'] == 'cisco_ios':
                        if rname not in q.queue:
                            q.put(rname)
                    else:
                        visited.append(rname)
                else:
                    logger.debug('Already visited %s', rname)

    # Count Neighbors
    ncount = 0
    for n in neighbors:
        ncount += 1
    logger.info('Total neighbors: %s', str(ncount))

    output.output_files(outf, ngout, dout, neighbors, devices, distances,
                        site_name, config)
Example #35
0
imgs_folder = 'imgs'
dataset = 'controls_popups_dataset.jsonl'

# clear results file
open(dataset, 'w').close()

# create image folder if not exists
if not os.path.exists(imgs_folder):
    os.makedirs(imgs_folder)

# Delete all .png files in directory
old_files = [ f for f in os.listdir(imgs_folder) if f.endswith(".png") ]
for file in old_files:
    os.remove(os.path.join(imgs_folder, file))
    
num_threads = 8


for _ in range(num_threads):
    extractor = ControlsExtractor(imgs_folder, dataset, queue)
    t = threading.Thread(target=extractor.start)
    t.daemon = True
    t.start()

while not queue.empty():
    print('queue size: ', queue.qsize())
    time.sleep(60)
    
queue.join()

Example #36
0
class Job51Engine(AbstractEngine):
    """51job 爬虫"""

    def __init__(self, **kwargs):
        self.name = kwargs['name']
        self.rawurl = kwargs['url']
        self.keywords =  "%2520" if kwargs['keywords'] == "" else kwargs['keywords']
        self.pagenum = kwargs['pagenum']

        self.url = self.rawurl.format(keywords=self.keywords, pagenum=self.pagenum)        
        self.hrefQueue = Queue(maxsize=5000)
        self.jobinfothreads = []
        self.joblisthreads = []
        self.totalpage = 0
        self.connection = connect(
            host='localhost',
            user='******',
            password='******',
            db='jobinfo',
            charset='utf8'
        )
        self.cursor = self.connection.cursor()
        self.templatesql = "INSERT INTO jobinfo (jid ,jurl ,jpost ,jsalary ,jcompany ,jsummary ,jpostinfo ,jconcatinfo ,jsource) VALUES ( '{jid}','{jurl}','{jpost}','{jsalary}','{jcompany}','{jsummary}', '{jpostinfo}','{jconcatinfo}','{jsource}' )"
    
    def execute(self):
        print("51job 爬虫启动...")
        indexpage = self._gethtmlpage(url=self.url, encoding='gbk')
        # 获取总页数
        self.totalpage = indexpage.select("#hidTotalPage")[0].attrs['value']
        # self.totalpage = 1
        self.__pagenum = self.genpagenum(int(self.totalpage))
            
        self._inithreads(self.joblisthreads, self._getjoblist)
        self._starthreads(self.joblisthreads)
        self._jointhreads(self.joblisthreads)

        print(self.hrefQueue.qsize())

        self._inithreads(self.jobinfothreads, self._getJobInfo)
        self._starthreads(self.jobinfothreads)
        self._jointhreads(self.jobinfothreads)

        print("51job 爬虫结束...")

    def _gethtmlpage(self, url, encoding='utf-8'):
        """
            通过url获取列表页面
            返回 BeautifulSoup 对象
        """
        response = requests.get(url)
        if 200 != response.status_code:
            BeautifulSoup("", 'lxml')
        response.encoding = encoding
        return BeautifulSoup(response.text, 'lxml')

    def _getJobInfo(self):
        """获取职位详情信息"""
        while True:
            if self.hrefQueue.empty():
                break
            jobinfourl = self.hrefQueue.get()
            # print("thread name {} : get href is {}".format(threading.currentThread().getName() ,jobinfourl))
            infopage = self._gethtmlpage(jobinfourl, 'gbk')
            try:
                company_center = infopage.select("div.tCompany_center.clearfix")[0]
                header = company_center.select("div.tHeader.tHjob")[0]
                company_main = company_center.select("div.tCompany_main")[0]
            except IndexError as identifier:
                break
            
            tBorderTop_boxs = company_main.select("div.tBorderTop_box")

            # 职位信息
            postInfo = tBorderTop_boxs[0]
            # 联系方式
            contactInfo = tBorderTop_boxs[1]

            # print(("=" * 30) + ">")
            # print("url: " + jobinfourl)
            # print("职位: " + header.select("div.cn h1")[0].attrs['title'])
            # print("薪资: " + header.select("div.cn strong")[0].text)
            # print("公司: " + header.select("div.cn p.cname a.catn")[0].attrs['title'])
            # print("摘要信息: " + header.select("div.cn p.msg.ltype")[0].attrs['title'])
            # print("职位信息: " + self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p")))
            # print("联系方式: " + contactInfo.select("div.bmsg.inbox > p.fp")[0].text)
            # print(("=" * 30) + "<")

            # job = JobModel()
            # job.juid = uuid()
            # job.url = jobinfourl
            # job.post = header.select("div.cn h1")[0].attrs['title']
            # job.salary = header.select("div.cn strong")[0].text
            # job.company = header.select("div.cn p.cname a.catn")[0].attrs['title']
            # job.summary = header.select("div.cn p.msg.ltype")[0].attrs['title']
            # job.postinfo = self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p"))
            # job.concatinfo = contactInfo.select("div.bmsg.inbox > p.fp")[0].text
            # job.source = 'http://www.51job.com'
            tempsql = self.templatesql.format(
                jid            = my_uuid(),
                jurl           = jobinfourl,
                jpost          = self._getjpost(header),
                jsalary        = self._getjsalary(header),
                jcompany       = self._getjcompany(header),
                jsummary       = self._getjsummary(header),
                jpostinfo      = str(self._pinjiezhiweixinxi(postInfo.select("div.bmsg.job_msg.inbox > p"))),
                jconcatinfo    = self._getconcatinfo(contactInfo),
                jsource        = 'http://www.51job.com'
            )

            counter_lock2.acquire()
            self.cursor.execute(tempsql)
            self.connection.commit()
            counter_lock2.release()

        # print("thread name {} : is exit.".format(threading.currentThread().getName()))
    def _getjsummary(self, header):
        try:
            return str(header.select("div.cn p.msg.ltype")[0].attrs['title'])
        except IndexError as identifier:
            return ""

    def _getjcompany(self, header):
        try:
            return str(header.select("div.cn p.cname a.catn")[0].attrs['title'])
        except IndexError as identifier:
            return ""

    def _getjsalary(self, header):
        try:
            return str(header.select("div.cn strong")[0].text)
        except IndexError as identifier:
            return ""

    def _getjpost(self, header):
        try:
            return str(header.select("div.cn h1")[0].attrs['title'])
        except IndexError as identifier:
            return ""

    def _getconcatinfo(self, element):
        try:
            return str(element.select("div.bmsg.inbox > p.fp")[0].text)
        except IndexError as identifier:
            return ""

    def _pinjiezhiweixinxi(self, resultset):
        result = []
        for temp in resultset:
            result.append("\n")
            result.append(str(temp.text).replace("'",""))
        return "".join(result)

    def _getjoblist(self):
        """获取职位列表"""
        while True:
            try:
                pagenum = next(self.__pagenum)
                # print("thread name {} : pagenum is {}".format(threading.currentThread().getName() , pagenum))
                listpage = self._gethtmlpage(
                    url=self.rawurl.format(
                        keywords=self.keywords, 
                        pagenum = pagenum
                    ),
                    encoding='gbk'
                )
                self._parserListpage(listpage)
            except StopIteration:
                break
        # print("thread name {} is exit.".format(threading.currentThread().getName()))

    def _parserListpage(self, listpage):
        table = listpage.select("div#resultList.dw_table div.el p.t1 span a")
        for item in table:
            self.hrefQueue.put(item.attrs['href'])
   
    def _inithreads(self, threadlist, target, threadsize=10):
        for i in range(threadsize):
            work = Thread(target=target, name=('job thread %s' % i))
            threadlist.append(work)

    def _starthreads(self, threads):
        for work in threads:
            work.start()

    def _jointhreads(self, threads):
        for work in threads:
            work.join()
Example #37
0
        data_rec.put(data)
        #print("received"+str(data))
        #message=input("->")
    s.close()


#if __name__==('__main__'):
#	 Main()


def threader():
    while True:
        Main()


t = threading.Thread(target=threader)
t.daemon = True
t.start()

#######################################################
#copy the line above to the top of ur code
#the line below is ur code
#

#it is a example here
while True:
    time.sleep(1)
    #receive data routine
    if data_rec.qsize() > 0:
        print(data_rec.get())
Example #38
0
class MainWindow(QWidget):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)
        #        self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint)
        #        self.setAttribute(Qt.WA_TranslucentBackground)
        self.resize(900, 700)
        self.__search_mode = {
            'fuzzy': 'fuzzy_search',
            'precise': 'precise_search',
            'reg': 'reg_search'
        }
        # self.__pbn_switch_view = None

        # 创建窗口部件
        self.__widget_frame = QLabel()

        # window title
        self.__lab_title_fram = QLabel()
        self.__lab_title = QLabel('搜索辅助工具')
        self.__lab_title.setAlignment(Qt.AlignCenter)

        self.__lab_open_tool = QLabel('打开文件方式')
        self.__ln_open_tool = QLineEdit()
        self.__pbn_open_tool = QToolButton()
        self.__pbn_open_tool.setText('选择...')
        self.__ln_open_tool.setFixedHeight(20)
        self.__ln_open_tool.setFixedWidth(150)
        self.__pbn_open_tool.setFixedSize(48, 20)
        self.__lab_title_fram.setFixedHeight(50)

        # search mode
        self.__lab_mode_fram = QLabel()
        self.__rbn_fuzzy = QRadioButton('模糊搜索')
        self.__rbn_precise = QRadioButton('精确搜索')
        self.__rbn_reg = QRadioButton('正则表达式搜索')
        self.__rbn_fuzzy.setChecked(True)
        self.__lab_mode_fram.setFixedHeight(22)

        # search pattern
        self.__lab_pattern_fram = QLabel()
        self.__ln_file_name = QLineEdit()
        self.__ln_file_name.setPlaceholderText('请输入搜索条件或正则表达式......')
        self.__rbn_reg_Iyes = QRadioButton('区分大小写')
        self.__rbn_reg_Ino = QRadioButton('不区分大小写')
        self.__lab_pattern_fram.setFixedHeight(20)

        # search path
        self.__lab_path_fram = QLabel()
        self.__ln_file_path = QLineEdit()
        self.__ln_file_path.setPlaceholderText('请选择或输入路径......')
        self.__pbn_file_path = QToolButton()
        self.__pbn_file_path.setText('浏览...')
        self.__rbn_search_file = QRadioButton('检索文件名')
        self.__rbn_search_content = QRadioButton('检索文件内容')
        self.__pbn_file_path.setFixedSize(48, 20)
        self.__lab_path_fram.setFixedHeight(20)

        # search state
        self.__lab_state_fram = QLabel()
        self.__lab_state = QLabel('状态:暂无搜索结果!')
        self.__pbn_search = QPushButton('开始')
        self.__pbn_stop = QPushButton('停止')
        self.__pbn_search.setFixedWidth(89)
        self.__pbn_stop.setFixedWidth(89)
        self.__lab_state_fram.setFixedHeight(35)

        # search result
        self.__tabView = QTabWidget()
        self.__browser_result = QListWidget()
        self.__browser_error = QTextBrowser()
        self.__tabView.addTab(self.__browser_result, '匹配结果')
        self.__tabView.addTab(self.__browser_error, '错误结果')

        self.__btn_group_type = QButtonGroup()
        self.__btn_group_type.addButton(self.__rbn_search_file)
        self.__btn_group_type.addButton(self.__rbn_search_content)
        self.__rbn_search_file.setChecked(True)

        # radiobutton group
        self.__btn_group_re_I = QButtonGroup()
        self.__btn_group_re_I.addButton(self.__rbn_reg_Iyes)
        self.__btn_group_re_I.addButton(self.__rbn_reg_Ino)
        self.__rbn_reg_Iyes.setChecked(True)

        # lines
        '''
        self.__line_1 = QFrame()
        self.__line_1.setFrameStyle(QFrame.HLine | QFrame.Sunken)
        '''

        # 布局
        # open tool
        self.__layout_tool_choose = QHBoxLayout()
        self.__layout_tool_choose.addWidget(self.__ln_open_tool)
        self.__layout_tool_choose.addWidget(self.__pbn_open_tool)
        self.__layout_tool_choose.setSpacing(0)
        self.__layout_tool_choose.setContentsMargins(0, 0, 0, 0)

        self.__layout_open_tool = QHBoxLayout()
        self.__layout_open_tool.addWidget(self.__lab_open_tool)
        self.__layout_open_tool.addLayout(self.__layout_tool_choose)
        self.__layout_open_tool.setSpacing(2)

        # window title
        self.__layout_title = QHBoxLayout()
        self.__layout_title.addStretch(5)
        self.__layout_title.addWidget(self.__lab_title)
        self.__layout_title.addStretch(1)
        self.__layout_title.addLayout(self.__layout_open_tool)
        self.__layout_title.setContentsMargins(0, 0, 0, 20)
        self.__lab_title_fram.setLayout(self.__layout_title)

        # search mode
        self.__layout_search_mode = QHBoxLayout()
        self.__layout_search_mode.addWidget(self.__rbn_fuzzy)
        self.__layout_search_mode.addStretch()
        self.__layout_search_mode.addWidget(self.__rbn_precise)
        self.__layout_search_mode.addStretch()
        self.__layout_search_mode.addWidget(self.__rbn_reg)
        self.__layout_search_mode.setContentsMargins(60, 0, 60, 0)
        self.__lab_mode_fram.setLayout(self.__layout_search_mode)

        # search pattern
        self.__layout_search_reg_I = QHBoxLayout()
        self.__layout_search_reg_I.addWidget(self.__rbn_reg_Iyes)
        self.__layout_search_reg_I.addWidget(self.__rbn_reg_Ino)

        self.__layout_pattern = QHBoxLayout()
        self.__layout_pattern.addWidget(self.__ln_file_name)
        self.__layout_pattern.addLayout(self.__layout_search_reg_I)
        self.__layout_pattern.setContentsMargins(0, 0, 0, 0)
        self.__lab_pattern_fram.setLayout(self.__layout_pattern)

        # search path
        self.__layout_choose_path = QHBoxLayout()
        self.__layout_choose_path.addWidget(self.__ln_file_path)
        self.__layout_choose_path.addWidget(self.__pbn_file_path)
        self.__layout_choose_path.setSpacing(0)

        self.__layout_path = QHBoxLayout()
        self.__layout_path.addLayout(self.__layout_choose_path)
        self.__layout_path.addWidget(self.__rbn_search_file)
        self.__layout_path.addWidget(self.__rbn_search_content)
        self.__layout_path.setContentsMargins(0, 0, 0, 0)
        self.__lab_path_fram.setLayout(self.__layout_path)

        # search state
        self.__layout_state = QHBoxLayout()
        self.__layout_state.addWidget(self.__lab_state)
        self.__layout_state.addWidget(self.__pbn_search)
        self.__layout_state.addWidget(self.__pbn_stop)
        self.__layout_state.setContentsMargins(0, 0, 0, 10)
        self.__lab_state_fram.setLayout(self.__layout_state)

        # top layout
        self.__layout_top = QVBoxLayout()
        self.__layout_top.addWidget(self.__lab_title_fram)
        self.__layout_top.addWidget(self.__lab_mode_fram)
        self.__layout_top.addWidget(self.__lab_pattern_fram)
        self.__layout_top.addWidget(self.__lab_path_fram)
        self.__layout_top.addWidget(self.__lab_state_fram)
        self.__layout_top.addWidget(self.__tabView)
        self.__layout_top.setSpacing(10)
        self.__widget_frame.setLayout(self.__layout_top)

        self.__layout_fram = QGridLayout()
        self.__layout_fram.addWidget(self.__widget_frame, 0, 0, 1, 1)
        self.__layout_fram.setContentsMargins(0, 0, 0, 0)
        self.setLayout(self.__layout_fram)

        # set object name
        self.__widget_frame.setObjectName('fram')
        self.__lab_title.setObjectName('lab_title')
        self.__ln_open_tool.setObjectName('ln_open_tool')
        self.__lab_mode_fram.setObjectName('mode_fram')
        self.__ln_file_name.setObjectName('ln_pattern')
        self.__ln_file_path.setObjectName('ln_path')
        self.__lab_state.setObjectName('state')
        self.__tabView.setObjectName('tabView')
        self.__browser_result.setObjectName('browser_result')
        self.__browser_error.setObjectName('browser_error')

        self.setStyleSheet(
            '#fram{'
            'border-image: url(Images/bg);'
            '}'
            '#lab_title{'
            'color: white;'
            'font-size: 18pt;'
            '}'
            '#open_tool{'
            'color: black;'
            '}'
            '#mode_fram{'
            # 'border-top: 1px solid rgba(20, 20, 20, 100);'
            # 'border-bottom: 1px solid rgba(20, 20, 20, 100);'
            'background: rgba(0, 0, 0, 40);'
            '}'
            '#ln_open_tool, #ln_path{'
            'border-top-left-radius:    2px;'
            'border-bottom-left-radius: 2px;'
            '}'
            '#ln_pattern{'
            'border-radius: 2px;'
            '}'
            '#state{'
            'background: rgba(0, 0, 0, 40);'
            'border-radius: 2px;'
            'padding: 1px;'
            'color: rgb(240, 240, 240);'
            '}'
            'QTabBar::tab {'
            'border: 0;'
            'width:  90px;'
            'height: 20px;'
            'margin: 0 2px 0 0;'  # top right bottom left
            # 'border-top-left-radius: 5px;'
            # 'border-top-right-radius: 5px;'
            'color: rgb(200, 255, 255;);'
            '}'
            'QTabBar::tab:selected{'
            'background: rgba(25, 0, 0, 40);'
            'border-left: 1px solid rgba(255, 255, 255, 200);'
            'border-top: 1px solid rgba(255, 255, 255, 200);'
            'border-right: 1px solid rgba(255, 255, 255, 200);'
            '}'
            'QTabWidget:pane {'
            'border: 1px solid rgba(255, 255, 255, 200);'
            'background: rgba(0, 0, 0, 80);'
            '}'
            '#browser_result, #browser_error{'
            'background: rgba(0, 0, 0, 0);'
            'border: 0;'
            '}'
            'QLineEdit{'
            'background: rgba(0, 0, 0, 40);'
            'border: 1px solid rgba(220, 220, 220, 200);'
            'color: white;'
            'height: 20px;'
            '}'
            'QPushButton{'
            'background: rgba(0, 0, 0, 100);'
            'border-radius: 5px;'
            'height: 20px;'
            'color: white;'
            '}'
            'QPushButton::hover{'
            'background: rgba(0, 0, 0, 150);'
            '}'
            'QToolButton{'
            'background: rgba(0, 0, 0, 100);'
            'color: white;'
            'border-top-right-radius:    2px;'
            'border-bottom-right-radius: 2px;'
            '}'
            'QToolButton::hover{'
            'background: rgba(0, 0, 0, 150);'
            '}')

        self.__ln_file_name.setFocus()
        self.__pbn_search.setShortcut(Qt.Key_Return)

        # 关联 信号/槽
        self.__pbn_file_path.clicked.connect(self.choose_path)
        self.__pbn_search.clicked.connect(self.pbn_search_clicked)
        self.__pbn_stop.clicked.connect(self.pbn_stop)
        self.__pbn_open_tool.clicked.connect(self.choose_open_tool)
        self.__browser_result.doubleClicked.connect(self.listitem_clicked)

        # 线程间共享数据队列
        queue_size = 10000
        self.__queue_result = Queue(queue_size)
        self.__queue_error = Queue(queue_size)

        # 标记搜索状态
        self.__searching = False

        # 强制结束子线程
        self.__thread_killer = False

    # 重写鼠标按下事件
    def mousePressEvent(self, event):
        if event.button() == Qt.LeftButton:
            self.offset = event.globalPos() - self.pos()

    # 重写鼠标移动事件
    def mouseMoveEvent(self, event):
        if event.buttons() == Qt.LeftButton:
            self.move(event.globalPos() - self.offset)

    # 检测记事本程序
    def set_open_tool(self):
        if platform.architecture() == ('32bit', 'WindowsPE'):
            possible_dir = [
                'C:\\Program Files\\Sublime Text 2', 'C:\\Sublime Text 2',
                'D:\\Program Files\\Sublime Text 2', 'D:\\Sublime Text 2',
                'E:\\Program Files\\Sublime Text 2', 'E:\\Sublime Text 2',
                'F:\\Program Files\\Sublime Text 2', 'F:\\Sublime Text 2',
                'C:\\Program Files\\Notepad++', 'C:\\notepad++',
                'D:\\Program Files\\Notepad++', 'D:\\notepad++',
                'E:\\Program Files\\Notepad++', 'E:\\notepad++',
                'F:\\Program Files\\Notepad++', 'F:\\notepad++',
                'C:\\Windows\\System32'
            ]
        elif platform.architecture() == ('32bit', 'ELF'):
            possible_dir = ['/usr/bin']
        for rootdir in possible_dir:
            for root, dirs, files in walk(rootdir):
                for file in files:
                    if file == 'sublime_text.exe' or file == 'notepad++.exe' or file == 'notepad.exe':
                        self.__ln_open_tool.setText(join(root, file))
                        return

    # 搜索文件名
    def search_from_filename(self,
                             filepath,
                             filename,
                             mode='fuzzy_search',
                             I=True):
        # check arguments of searching
        if filepath == '' or not exists(filepath):
            return False
        if mode not in self.__search_mode.values():
            return False
        if filename == '':
            return False

        # count
        count = 0

        # fuzzy mode
        if mode == self.__search_mode['fuzzy']:
            for root, dirs, files in walk(filepath):
                for each_file in files:
                    # kill subThread
                    if self.__thread_killer == True:
                        return

                    if filename in each_file:
                        count += 1
                        self.__lab_state.setText('正在搜索......已搜到 %d 个文件' %
                                                 count)
                        self.__queue_result.put(join(root, each_file))
                        self.__tabView.setTabText(0, '匹配结果(%d)' % count)
        # precise mode
        elif mode == self.__search_mode['precise']:
            for root, dirs, files in walk(filepath):
                for each_file in files:
                    # kill subThread
                    if self.__thread_killer == True:
                        return

                    if filename == splitext(
                            each_file)[0] or filename == each_file:
                        count += 1
                        self.__lab_state.setText('正在搜索......已搜到 %d 个文件' %
                                                 count)
                        self.__queue_result.put(join(root, each_file))
                        self.__tabView.setTabText(0, '匹配结果(%d)' % count)
        # regular expression mode
        elif mode == self.__search_mode['reg']:
            if I:
                pattern = re.compile(r'%s' % filename)
            else:
                pattern = re.compile(r'%s' % filename, re.I)

            for root, dirs, files in walk(filepath):
                for each_file in files:
                    # kill subThread
                    if self.__thread_killer == True:
                        return

                    if re.search(pattern, each_file):
                        count += 1
                        self.__lab_state.setText('正在搜索......已搜到 %d 个文件' %
                                                 count)
                        self.__queue_result.put(join(root, each_file))
                        self.__tabView.setTabText(0, '匹配结果(%d)' % count)
        self.__lab_state.setText('搜索完毕! 共搜到 %d 个文件' % count)  # finished
        self.__searching = False  # set serching flag

    # 搜索文件内容
    def search_from_content(self, path, content, mode='fuzzy_search', I=True):
        if path == '' or not exists(path):
            return False
        if mode not in self.__search_mode.values():
            return False
        if content == '':
            return False
        pass_file_count = 0
        error_number = 0
        current_file = ''
        processing_file = ''
        match_files_count = 0
        if mode == self.__search_mode['reg']:
            if I:
                pattern = re.compile(r'%s' % content)
            else:
                pattern = re.compile(r'%s' % content, re.I)
            for root, dirs, files in walk(path):
                for each_file in [
                        file for file in files if file.endswith('.h')
                        or file.endswith('.cpp') or file.endswith('.cs')
                ]:
                    current_file = join(root, each_file)
                    pass_file_count += 1
                    self.__lab_state.setText('正在搜索......%s' % current_file)
                    try:
                        for line_number, line in enumerate(open(current_file)):
                            # kill subThread
                            if self.__thread_killer == True:
                                return

                            if re.search(pattern, line):
                                if processing_file != current_file:
                                    self.__queue_result.put('\n%s' %
                                                            current_file)
                                    processing_file = current_file
                                    match_files_count += 1
                                self.__queue_result.put(
                                    'line %s: %s' %
                                    (line_number, line.strip()))
                    except Exception as error:
                        self.__queue_error.put("%s\n(%s)\n" %
                                               (error, current_file))
                        pass_file_count -= 1
                        error_number += 1
                        continue
                    self.__tabView.setTabText(0,
                                              '匹配结果(%d)' % match_files_count)
        else:
            for root, dirs, files in walk(path):
                for each_file in [
                        file for file in files if file.endswith('.h')
                        or file.endswith('.cpp') or file.endswith('.cs')
                        or file.endswith('.txt') or file.endswith('.py')
                ]:
                    current_file = join(root, each_file)
                    pass_file_count += 1
                    self.__lab_state.setText('正在搜索......%s' % current_file)
                    try:
                        for line_number, line in enumerate(open(current_file)):
                            # kill subThread
                            if self.__thread_killer == True:
                                return

                            if content in line:  # 匹配成功
                                if processing_file != current_file:  # 如果是新文件
                                    self.__queue_result.put(
                                        '\n%s' % current_file)  # 文件名入队
                                    processing_file = current_file  # 更新文件标记
                                    match_files_count += 1
                                self.__queue_result.put(
                                    'line %s: %s' %
                                    (line_number, line.strip()))  # 匹配行入队
                    except Exception as error:
                        self.__queue_error.put("%s\n(%s)\n" %
                                               (error, current_file))
                        pass_file_count -= 1
                        error_number += 1
                        continue
                    self.__tabView.setTabText(0,
                                              '匹配结果(%d)' % match_files_count)
        # self.__queue_result.put()
        self.__lab_state.setText(
            '搜索完毕!成功匹配 %d 个文件,处理 %s 个文件,失败 %s 文件。' %
            (match_files_count, pass_file_count, error_number))
        self.__searching = False

    # 单击选择路径按钮
    def choose_path(self):
        path = QFileDialog.getExistingDirectory()
        if path != '':
            path = sep.join(path.split('/'))
            self.__ln_file_path.setText(path)

    # 选择打开文件工具
    def choose_open_tool(self):
        path = QFileDialog.getOpenFileName()
        if path[0] != '':
            self.__ln_open_tool.setText(path[0])

    # 显示搜索结果
    def show_search_result(self):
        """将搜索结果加载到界面,供用户查看和操作"""
        line_block = []  # 定义临时列表,成批加载,避免刷新频率过高造成界面闪烁
        block_size = 10  # 一次性加载的个数
        while self.__searching or self.__queue_result.qsize():
            # kill subThread
            if self.__thread_killer == True:
                return

            # if self.__searching or self.__queue_result.qsize() >= block_size:     // 永远记住这个 bug (生产者-消费者 问题)
            if self.__queue_result.qsize(
            ) >= block_size:  # 如果队列中不小于 block_size 个项
                for i in range(block_size):  # 取出 block_size 个项
                    line_block.append(self.__queue_result.get())  # 出队操作
                self.__browser_result.addItems(
                    line_block)  # 一次性添加 block_size 个条目
                line_block.clear()  # 清空临时列表
            elif self.__queue_result.qsize() >= 0:  # 如果队列中小于 block_size 各项
                item = self.__queue_result.get()  # 出队一项
                self.__browser_result.addItem(QListWidgetItem(item))  # 加载到界面
            #self.__browser.setCurrentRow(self.__browser.count()-1)                  # 设置列表中最后一项为当前项,使列表不停滚动
            sleep(0.05)  # 给界面事件循环腾出时间,避免界面冻结
        #self.__pbn_search.setEnabled(True)

    # 显示出错结果
    def show_error_result(self):
        """打印略过的文件和出错原因,多为 I/O Error"""
        count = 0
        while self.__queue_error.qsize() or self.__searching:
            # kill subThread
            if self.__thread_killer == True:
                return

            if self.__queue_error.qsize() <= 0:
                continue
            self.__browser_error.append(self.__queue_error.get())
            count += 1
            self.__tabView.setTabText(1, '错误结果(%d)' % count)

    # 单击检索按钮
    def pbn_search_clicked(self):
        """To search allow the arguments from UI"""
        # 获取 UI 数据
        file_path = self.__ln_file_path.text()
        file_name = self.__ln_file_name.text()

        # 检查参数
        if file_path == '':
            QMessageBox(QMessageBox.Warning, '缺少参数!', '请输入搜索路径!',
                        QMessageBox.Ok, self).exec_()
            return
        if file_name == '':
            QMessageBox(QMessageBox.Warning, '缺少参数!', '请输入匹配条件!',
                        QMessageBox.Ok, self).exec_()
            return

        # 判断搜索模式
        mode = self.__search_mode['fuzzy']
        if self.__rbn_reg.isChecked():
            mode = self.__search_mode['reg']
        elif self.__rbn_fuzzy.isChecked():
            mode = self.__search_mode['fuzzy']
        elif self.__rbn_precise.isChecked():
            mode = self.__search_mode['precise']

        # 大小写敏感标记
        I = True
        if self.__rbn_reg_Ino.isChecked():
            I = False

        self.__browser_result.clear()
        self.__browser_error.clear()
        self.__tabView.setTabText(0, '匹配结果(0)')
        self.__tabView.setTabText(1, '错误结果(0)')
        self.__searching = True

        # 开启子线程,后台深度遍历
        self.__thread_killer = False
        if self.__rbn_search_file.isChecked():
            self.__lab_state.setText('正在搜索......已搜索到 0 个文件')
            self.__sub_thread_search = Thread(target=self.search_from_filename,
                                              args=(file_path, file_name, mode,
                                                    I))
            self.__sub_thread_search.start()
        else:
            self.__lab_state.setText('正在搜索......')
            self.__sub_thread_search = Thread(target=self.search_from_content,
                                              args=(file_path, file_name, mode,
                                                    I))
            self.__sub_thread_search.start()

        # 开启子线程,显示搜索结果
        self.__sub_thread_show_result = Thread(target=self.show_search_result)
        self.__sub_thread_show_result.start()

        # 开启子线程,显示错误结果
        self.__sub_thread_show_error = Thread(target=self.show_error_result)
        self.__sub_thread_show_error.start()

        # self.__pbn_search_file.setEnable(False)
        # self.__pbn_search_content.setEnable(False)

    # 单击停止按钮
    def pbn_stop(self):
        if not self.__searching:
            return
        self.__thread_killer = True
        while self.__queue_result.qsize():
            self.__queue_result.get()
        while self.__queue_error.qsize():
            self.__queue_error.get()
        self.__lab_state.setText('搜索已停止!')
        self.__searching = False

    # 双击搜索结果
    def listitem_clicked(self):
        """Double click to open the file from search result"""
        file_path = self.__browser_result.currentItem().text().strip()
        read_tool = self.__ln_open_tool.text()
        if not exists(file_path):
            QMessageBox.warning(self, '错误!',
                                '请双击文件名\n%s 不是文件或打不开!' % file_path,
                                QMessageBox.Ok)
            return
        if splitext(file_path)[1] in ['.jpg', '.png', '.jpeg', '.gif']:
            file_path = r'%s'.replace(' ', r'\ ') % file_path
            system('%s' % file_path)
        else:
            system('"%s" %s' % (read_tool, file_path))
Example #39
0
class MinicapStream:
    __instance = None
    __mutex = threading.Lock()

    def __init__(self, host="127.0.0.1", port=1717):
        self.IP = host  # 定义IP
        self.PORT = port  # 监听的端口
        self.Pid = 0  # 进程ID
        self.banner = Banner()  # 用于存放banner头信息
        #         self.minicapSocket = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
        self.minicapSocket = None
        self.ReadImageStreamTask = None
        self.push = None
        self.picture = Queue()
        self.data = b''

    @staticmethod
    def getBuilder():
        """Return a single instance of TestBuilder object """
        if (MinicapStream.__instance == None):
            MinicapStream.__mutex.acquire()
            if (MinicapStream.__instance == None):
                MinicapStream.__instance = MinicapStream()
            MinicapStream.__mutex.release()
        return MinicapStream.__instance

    def get_d(self):
        print(self.picture.qsize())

    def run(self):
        # 开始执行
        # 启动socket连接
        p1 = threading.Thread(target=self.ReadImageStream)
        p1.start()
        # p1.join()

    def ReadImageStream(self):
        self.minicapSocket = socket.socket(
            socket.AF_INET, socket.SOCK_STREAM)  # 定义socket类型,网络通信,TCP
        self.minicapSocket.connect((self.IP, self.PORT))
        # 读取图片流到队列

        readBannerBytes = 0
        bannerLength = 2
        readFrameBytes = 0
        frameBodylength = 0
        dataBody = b""
        while True:
            reallen = self.minicapSocket.recv(4096)
            length = len(reallen)
            if not length:
                continue
            cursor = 0
            while cursor < length:
                # 获取图片头部信息
                if readBannerBytes < bannerLength:
                    if readBannerBytes == 0:
                        self.banner.Version = reallen[cursor]
                    elif readBannerBytes == 1:
                        bannerLength = reallen[cursor]
                        self.banner.Length = bannerLength
                    elif readBannerBytes in [2, 3, 4, 5]:
                        self.banner.Pid += (reallen[cursor] <<
                                            ((readBannerBytes - 2) * 8)) >> 0
                    elif readBannerBytes in [6, 7, 8, 9]:
                        self.banner.RealWidth += (reallen[cursor] << (
                            (readBannerBytes - 6) * 8)) >> 0
                    elif readBannerBytes in [10, 11, 12, 13]:
                        self.banner.RealHeight += (reallen[cursor] << (
                            (readBannerBytes - 10) * 8)) >> 0
                    elif readBannerBytes in [14, 15, 16, 17]:
                        self.banner.VirtualWidth += (reallen[cursor] << (
                            (readBannerBytes - 14) * 8)) >> 0
                    elif readBannerBytes in [18, 19, 20, 21]:
                        self.banner.VirtualHeight += (reallen[cursor] << (
                            (readBannerBytes - 18) * 8)) >> 0
                    elif readBannerBytes == 22:
                        self.banner.Orientation = reallen[cursor] * 90
                    elif readBannerBytes == 23:
                        self.banner.Quirks = reallen[cursor]
                    cursor += 1
                    readBannerBytes += 1
                    if readBannerBytes == bannerLength:
                        print(self.banner.toString())
                elif readFrameBytes < 4:
                    # 第一个过来的图片信息的前4个字符不是图片的二进制信息,而是携带着图片大小的信息
                    frameBodylength = frameBodylength + (
                        (reallen[cursor] << (readFrameBytes * 8)) >> 0)
                    cursor += 1
                    readFrameBytes += 1
                    # print('{} - {} '.format(cursor,frameBodylength))
                else:
                    # 真正获取图片信息,比如我们接受到的信息长度为n,4~n部分是图片的信息,需要保存下来。
                    # print('{} - {} - {} '.format(length,cursor, frameBodylength))
                    if length - cursor >= frameBodylength:
                        dataBody = dataBody + (reallen[cursor:(
                            cursor + frameBodylength)])
                        if dataBody[0] != 0xFF or dataBody[1] != 0xD8:
                            return
                        self.picture.put(dataBody)
                        # self.save_file('d:/pic.png', dataBody)
                        cursor += frameBodylength
                        frameBodylength = 0
                        readFrameBytes = 0
                        dataBody = b""
                    else:
                        dataBody = dataBody + reallen[cursor:length]
                        frameBodylength -= length - cursor
                        readFrameBytes += length - cursor
                        cursor = length

    # adb shell LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/minicap -P 1200x1920@1200x1920/0
    #             adb forward tcp:1313 localabstract:minicap

    def save_file(self, file_name, data):
        print(file_name)
        file = open(file_name, "wb")
        file.write(data)
        file.flush()
        file.close()
def shai_bagons_bad_ass_distributed_space_time_backprojection(
        hfr_hr_pred, lfr_hr_in, hfr_lr_in, base_folder):
    # inputs are in H-W-T-C np arrays
    _rusage('badbp init')
    t = time.time()
    if bp_debug:
        print('-badbp- hfr_hr_pred {}, lfr_hr_in {}, hfr_lr_in {}'.format(
            hfr_hr_pred.shape if hfr_hr_pred is not None else None,
            lfr_hr_in.shape if lfr_hr_in is not None else None,
            hfr_lr_in.shape if hfr_lr_in is not None else None))

    rate = hfr_lr_in.shape[2] // lfr_hr_in.shape[2]
    assert (rate > 1)
    #print('-badbp- submittinig with temporal sample rate = {}'.format(rate))
    max_chunk_size = max(
        24 // rate,
        1)  # number of hfr frames to process at a chunk. at least rate frames
    tag = '{}/{}x{}x{}/'.format(base_folder, lfr_hr_in.shape[0],
                                lfr_hr_in.shape[1], hfr_lr_in.shape[2])

    num_workers = max(8, min(24, lfr_hr_in.shape[2] // max_chunk_size))
    cid = 0
    fr_ = 0

    chunk_q = Queue()
    jid_q = Queue()
    err_q = Queue()
    # global data
    # allocate room for output
    hfr_hr_pred = np.zeros([
        lfr_hr_in.shape[0], lfr_hr_in.shape[1], hfr_lr_in.shape[2],
        hfr_lr_in.shape[3]
    ],
                           dtype=np.float32)
    _rusage('badbp allocating out')

    # multi threading submission
    class Worker(Thread):
        def __init__(self, inq, outq, errq):
            super(Worker, self).__init__()
            self.inq = inq
            self.outq = outq
            self.errq = errq
            self.daemon = True
            self.start()

        def run(self):
            # first part - submit all chunks
            while True:
                item = self.inq.get()
                if item is None:
                    # "stop" signal
                    self.inq.task_done()
                    break
                fr_, to_, cid = item
                hfr_lr_folder = '{}-c{}-hfr_lr'.format(tag, cid)
                _write_chunk(hfr_lr_in[..., fr_ * rate:to_ * rate, :],
                             hfr_lr_folder)
                lfr_hr_folder = '{}-c{}-lfr_hr'.format(tag, cid)
                _write_chunk(lfr_hr_in[..., fr_:to_, :], lfr_hr_folder)
                pyargs = '--lfr_hr_in {} --hfr_lr_in {}'.format(
                    lfr_hr_folder, hfr_lr_folder)
                if hfr_hr_pred is not None:
                    hfr_hr_folder = '{}-c{}-hfr_hr_in'.format(tag, cid)
                    _write_chunk(hfr_hr_pred[..., fr_ * rate:to_ * rate, :],
                                 hfr_hr_folder)
                    pyargs = '{} --hfr_hr_pred {}'.format(
                        pyargs, hfr_hr_folder)
                output_folder = '{}-c{}-output'.format(tag, cid)
                pyargs = '{} --output {}'.format(pyargs, output_folder)
                jid = _submit_job_and_get_jid(pyargs, tag)
                self.outq.put([jid, output_folder, fr_, to_])
                self.inq.task_done()
            # second part - gather
            while True:
                item = self.outq.get()
                if item is None:
                    # "stop" signal
                    self.outq.task_done()
                    break
                jid, output_folder, fr_, to_ = item

                status = _check_job_status(jid)
                if status == 'done':
                    #print('chunk {}:{} done!'.format(fr_, to_))
                    hfr_hr_pred[..., rate * fr_:rate * to_, :] = _read_chunk(
                        output_folder, (to_ - fr_) * rate)
                elif status == 'exit':
                    #print('chunk {}:{} jid={} failed filling in rubish'.format(fr_, to_, jid))
                    hfr_hr_pred[..., rate * fr_:rate * to_, :] = 0.5
                    try:
                        with open('{}{}.e'.format(tag, jid), 'r') as R:
                            for l in R.readlines():
                                z = 0
                                #print('\t-jid{}-{}'.format(jid, l.rstrip()))
                    except:
                        pass
                    self.errq.put(jid)
                else:
                    #print('\t jid {} status {}'.format(jid, status))
                    # resubmit
                    self.outq.put(item)
                self.outq.task_done()
                time.sleep(1)

    # start the workers
    workers = [Worker(chunk_q, jid_q, err_q) for _ in range(num_workers)]
    _rusage('badbp workers started')

    while fr_ < lfr_hr_in.shape[2]:
        to_ = min(lfr_hr_in.shape[2], fr_ + max_chunk_size)
        chunk_q.put((fr_, to_, cid))
        cid += 1
        fr_ = to_
    assert (rate * fr_ == hfr_lr_in.shape[2])

    # signal workers to move to next stage
    for _ in range(num_workers):
        chunk_q.put(None)
    _rusage('badbp done distributing')

    chunk_q.join()  # wait for all jobs to be submitted
    #print('-badbp- done submitting all chunks to {} workers \t {:.2f} [sec]'.format(num_workers, time.time() - t))

    jid_q.join()  # wait for all frames to be collected
    _rusage('badbp frames collected')

    # signal workers to move to exit
    for _ in range(num_workers):
        jid_q.put(None)
    jid_q.join()  # wait for all threads to get the signal

    # make sure all threads exited
    # assert(all([not w_.is_alive() for w_ in workers]))

    # cleanup
    if err_q.qsize() == 0:
        shutil.rmtree(path=tag, ignore_errors=True)
    else:
        while True:
            try:
                ejid = err_q.get(block=False)
            except Empty:
                break
        err_q.task_done()
        #print('-badbp- job id {} had an error'.format(ejid))
    _rusage('badbp done')
    #print('-badbp- done  \t {:.2f} [sec]'.format(time.time() - t))
    return np.clip(hfr_hr_pred, 0., 1.)
Example #41
0
class CoreScrapeThread(CoreScrape):
    """
    Core Scrape Thread.

    Uses multiples threads to request pages and parse its content.
    A valid rotator must be passed to produce each request using a new proxy
    and make it less likely to be red flagged as a bot or scrapper by internet
    service providers. The user could pass a parser (CoreScrape class or custom
    class with a 'parse' method) to parse the response and avoid having the need
    to store the whole page for postprocessing.
    This controller also gives the user the option to set up a timer, in seconds,
    to raise a timeout. The timer is set if the user provided an integer to param
    'timeout' during 'start_threads' method processing. The timer is unset in
    'wait_for_threads' method.

    Params:
        nthreads: int. Desired number of threads. Once the method 'start_threads' is
            called, the controller will try to split the given input into chunks of
            number 'nthreads'. If it is not possible to split in 'nthreads' chunks,
            then the actual number of threads is available in 'actualnthreads'.
        rotator: corescrape.proxy.Rotator (preferably). Uses this rotator to make
            requests using different proxies and user agents. There is always the
            possibility to pass the 'requests' module to this parameter, but that is
            not advised as the control of proxies and user-agents is not automatic.
        parser: corescrape.pgparser.SimpleParser, based on or None. Uses this to
            parse the page content and extract the useful information, making it
            less memory expensive. If no argument is given, the thread controller
            will return a list of the full pages collected.
        timeout: int or None. Time in seconds to configure the timeout process.
            Set up a timer to raise an event and stop the threads once the time is
            reached.
        logoperator: corescrape.logs.LogOperator or None. Log to be fed with process
            runtime information.
    """

    def __init__(self, nthreads, rotator, parser=None, timeout=None,
                 logoperator=None):
        """Constructor."""

        if timeout is not None and not isinstance(timeout, int):
            raise TypeError("Param. 'timeout' must be 'int' or 'NoneType'")

        # inputs
        self.nthreads = nthreads
        self.actualnthreads = nthreads
        self.rotator = rotator
        self.parser = parser
        self.timeout = timeout  # CAREFUL! This is not timeout for requests
        self.timeoutset = False

        # control attrs
        self.queue = Queue()
        self.event = corescrape_event.CoreScrapeEvent(logoperator=logoperator)
        self.threads = []

        super().__init__(logoperator=logoperator)

    def __split(self, a):
        """
        Tries to split the input into chunks for each thread.

        Input must be a list.
        """

        if not isinstance(a, list):
            raise TypeError("Param 'a' must be 'list'")

        n = self.nthreads  # desired number of threads
        k, m = divmod(len(a), n)
        split = [a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
        split = [part for part in split if part]  # drops empty chunks
        # actual number of threads. Sometimes differs from 'nthreads'
        self.actualnthreads = len(split)
        return split

    def __warn_wait_threads(self):
        """Produce warning to wait for threads if needed."""

        if self.threads:
            warn(
                'There are threads running. Wait for them to stop before calling '
                'this method'
            )
            return True
        return False

    def __set_timeout(self):
        """
        If seconds for timeout were informed in the constructor, will set an alarm
        for timeout. Once timeout is reached, the iteration is broken and return
        as expected.
        """

        if self.timeout:
            signal.signal(signal.SIGALRM, alarm_handler)
            signal.alarm(self.timeout)
            self.log('CoreScrapeThread set the timeout for {} seconds.'.format(
                self.timeout), tmsg='info')
            self.timeoutset = True

    def __disarm_timeout(self):
        """Turn off the timeout."""

        if self.timeoutset:
            self.timeoutset = False
            signal.alarm(0)
            self.log('CoreScrapeThread disarmed the timeout.', tmsg='info')

    def __check_am_i_the_last(self):
        """Check if this thread is the last and if it should set an event."""

        condition = self.queue.qsize() + 1 >= self.actualnthreads
        condition = condition and self.event.state.is_EXECUTING()
        if condition:
            self.event.state.set_DUTY_FREE()

    def __iterate(self, threadid, data, *args):
        """Do iterations in threads, each one calling the passed code."""

        # pylint: disable=unused-argument

        self.log('Starting iteration in threadid {} for {} items'.format(
            threadid, len(data)))
        res = []
        for url in data:
            # the reason here does not matter. If it is set, break out
            if self.event.is_set(): break

            try:
                page = self.rotator.request(url, self.event, threadid=threadid)
            except:
                self.event.state.set_ABORT_THREAD()
                break

            if page is None: continue  # not able to retrieve the page

            if self.parser is None:
                res.append(page)
                self.log('Storing whole response for {}. Thread {}'.format(
                    url, threadid))
            elif page.status_code == 404:
                self.log('URL {} returned a 404. Thread {}'.format(url, threadid),
                         tmsg='warning')
                res.append({url: None})  # points it was collected but useless
            else:
                _res = self.parser.parse(page, threadid=threadid)
                if not _res:
                    self.log('URL {} could not be parsed. Thread {}'.format(
                        url, threadid))
                    continue  # no info collected, must go on
                self.log('URL {} collected. Thread {}'.format(url, threadid),
                         tmsg='header')
                res.append({url: _res})

        self.__check_am_i_the_last()
        return res

    def start_threads(self, to_split_params, *fixed_args):
        """Starts threads."""

        def test_if_urls(p):
            return [a.startswith('http://') or a.startswith('https://') for a in p]

        # pylint: disable=no-value-for-parameter

        abort = self.__warn_wait_threads()
        if abort:
            return False

        if not all(test_if_urls(to_split_params)):
            raise ValueError('List of strings must begin with protocol')

        self.log('Starting threads for {} items'.format(len(to_split_params)))

        self.threads = []
        self.event.state.set_EXECUTING()
        for threadid, split in enumerate(self.__split(to_split_params)):
            pargs = (threadid, split, *fixed_args)
            thread = Thread(
                target=lambda q, *args: q.put(self.__iterate(*args)),
                args=(self.queue, *pargs)
            )
            thread.start()
            self.threads.append(thread)

        self.__set_timeout()

        return True

    def wait_for_threads(self):
        """Wait lock for threads."""

        try:
            self.event.wait()
        except KeyboardInterrupt:
            self.event.state.set_ABORT_USER()
        except CoreScrapeTimeout:
            self.event.state.set_TIMEOUT()
        finally:
            self.__disarm_timeout()
            for thread in self.threads:
                thread.join()
            self.event.clear()
            self.threads = []

    def join_responses(self):
        """Join responses from the threads."""

        abort = self.__warn_wait_threads()
        if abort:
            return []

        res = []
        while not self.queue.empty():
            res += self.queue.get()
        return res

    def is_sentenced(self):
        """
        Informs if the thread controller is sentenced due to the last event state.
        """

        sentenced = self.event.state.is_sentenced()
        if sentenced:
            self.event.state.set_FINISHED()
        return sentenced
class IPReverse():
    def __init__(self):
        self.Threadcount = int(input("Threads?\n"))
        os.system("cls")
        self.ProxyLoc = input("Proxyfile?\n")
        os.system("cls")
        self.IPListLoc = input("IPList?\n")
        os.system("cls")
        choose = input("[1]HTTP/S\n[2]Socks4\n[3]Socks5\n")
        os.system("cls")
        if choose == 1:
            self.ProxyPre = ""
        elif choose == 2:
            self.Proxypre = "socks4://"
        else:
            self.Proxypre = "socks5://"

        try:
            IP = open("ips.txt", "r").readlines()
        except:
            print("Invalid IPFile!")
            pass

        self.proxys = Queue()
        self.IPs = Queue()

        self.PrintList = []

        self.currentproxy = ""
        self.ProxyCount = 0

        self.Count = 0
        self.Checked = 0
        self.Hits = 0
        self.Invalid = 0
        self.Domains = 0
        self.WriteQueue = Queue()

        self.CPM = 0

        try:
            proxy = open("proxy.txt", "r").readlines()
        except:
            print("Invalid Proxyfile!")
            pass

        for i in IP:
            self.IPs.put(i.strip())

        for i in proxy:
            self.proxys.put(self.Proxypre + i.strip())

        for i in range(0, self.Threadcount):
            threading.Thread(target=self.Thread).start()

        threading.Thread(target=self.CPMCounter).start()
        threading.Thread(target=self.Writer).start()
        threading.Thread(target=self.Printer).start()

    def CPMCounter(self):
        while True:
            old = self.Checked
            time.sleep(1)
            new = self.Checked
            self.CPM = int(new - old) * 60

    def Lookup(self, ip, proxy):
        try:
            resp = requests.get(
                "https://api.hackertarget.com/reverseiplookup/?q=" + ip,
                proxies={"https": proxy},
                timeout=3)
        except:
            return False
        return resp.text

    def Writer(self):
        while True:
            try:
                curip = self.WriteQueue.get(timeout=1)
            except:
                continue
            open("Output.txt", "a+").write(curip + "\n")

    def Printer(self):
        while True:
            ctypes.windll.kernel32.SetConsoleTitleW(
                "Reverse IP Lookup | Made by Nezuko | Proxys remaining: " +
                str(self.proxys.qsize()) + " | IPs remaining: " +
                str(self.IPs.qsize()) + " | Hits: " + str(self.Hits) +
                " | Found Domains: " + str(self.Domains) + " | CPM: " +
                str(self.CPM))
            cur = ""
            for i in self.PrintList:
                cur += i + "\n"
            print(cur)
            if self.ProxyCount >= 10:
                self.currentproxy = self.proxys.get()
                self.ProxyCount = 0
            time.sleep(0.1)
            os.system("cls")

    def Thread(self):
        while True:
            try:
                curip = self.IPs.get(timeout=1)
            except:
                continue
            if self.ProxyCount != 10:
                self.ProxyCount += 1
                resp = self.Lookup(curip, self.currentproxy)
            else:
                time.sleep(0.3)
                self.ProxyCount += 1
                resp = self.Lookup(curip, self.currentproxy)
            self.Checked += 1
            if resp == False or "429 Too Many Requests" in resp:
                self.IPs.put(curip)
                continue
            elif "No DNS A records found for " in resp or resp == "error check your search parameter":
                self.Invalid += 1
                #print(resp.text)
            else:
                #print(resp.text)
                self.Checked += 1
                self.Hits += 1

                domains = resp.split("\n")
                domains.pop(0)
                for m in domains:
                    self.Domains += 1
                    self.WriteQueue.put(m.strip())
                self.PrintList.append(colorama.Fore.GREEN +
                                      "Found Domain for " + curip + "!" +
                                      colorama.Style.RESET_ALL)
Example #43
0
class Brutedomain:
    def __init__(self, args):
        self.target_domain = args.domain
        self.check_env()
        self.cname_flag = args.cname
        if not (self.target_domain):
            print('usage: brutedns.py -h')
            sys.exit(1)

        self.level = args.level
        self.sub_dict = args.sub_file
        self.speed = args.speed
        self.default_dns = True if args.default_dns is "y" else False
        self.next_sub_dict = args.next_sub_file
        self.other_result = args.other_file

        self.timeout = 10
        self.resolver = dns.resolver.Resolver(configure=self.default_dns)
        self.resolver.lifetime = self.timeout
        self.resolver.timeout = self.timeout

        self.found_count = 0
        self.cmdline = ""
        self.queues = Queue()
        self.queue_sub = Queue()
        self.cdn_set = set()
        self.cname_set = set()
        self.white_filter_subdomain = set()
        self.cname_block_dict = dict()
        self.ip_block_dict = dict()
        self.ip_all_dict = dict()
        self.ip_flag_dict = dict()
        self.active_ip_dict = dict()
        self.ip_count_dict = dict()
        self.black_ip_dict = dict()
        self.ip_subdomain_dict = dict()

        self.set_next_sub = self.load_next_sub_dict()
        self.set_cdn = self.load_cdn()

        self.load_sub_dict_to_queue()
        self.extract_next_sub_log()

        self.segment_num = self.judge_speed(self.speed)

        if not self.default_dns:
            self.nameservers = self.load_nameservers()
            self.check_nameservers()

    def check_env(self):
        if (not os.path.exists(
                'result/{domain}'.format(domain=self.target_domain))):
            try:
                os.mkdir('result/{domain}'.format(domain=self.target_domain))
            except Exception as e:
                print(e)
                sys.exit(1)
        filename = 'result/{name}/{name}'.format(name=self.target_domain)
        if os.path.isfile(filename + ".csv"):
            new_filename = filename + "_" + str(
                os.stat(filename + ".csv").st_mtime).replace(".", "")
            os.rename(filename + ".csv", new_filename + ".csv")
        if os.path.isfile(filename + "_deal.csv"):
            if not new_filename:
                new_filename = filename + "_" + str(
                    os.stat(filename + "_deal.csv").st_mtime).replace(".", "")
            os.rename(filename + "_deal.csv", new_filename + "_deal.csv")

        if (platform.system() != "Windows"):
            try:
                self.cmdline = "\r\n"
                os.system("ulimit -n 65535")
            except Exception:
                pass
        else:
            self.cmdline = "\r"

    def load_cdn(self):
        cdn_set = set()
        with open('dict/cdn_servers.txt', 'r') as file_cdn:
            for cname in file_cdn:
                cdn_set.add(cname.strip())
        return cdn_set

    def load_next_sub_dict(self):
        next_sub_set = set()
        with open(self.next_sub_dict, 'r') as file_next_sub:
            for next_sub in file_next_sub:
                next_sub_set.add(next_sub.strip())
        return next_sub_set

    def load_sub_dict_to_queue(self):
        with open(self.sub_dict, 'r') as file_sub:
            for sub in file_sub:
                domain = "{sub}.{target_domain}".format(
                    sub=sub.strip(), target_domain=self.target_domain)
                self.queues.put(domain)

    def load_nameservers(self):
        nameserver_set = set()
        with open('dict/name_servers.txt', 'r') as nameservers:
            for nameserver in nameservers:
                nameserver_set.add(nameserver.strip())
        return nameserver_set

    def load_result_from_other(self):
        log_type = type(self.other_result)
        other_subdomain_list = list()
        if (log_type == str):
            try:
                subdomain_log = open(
                    '{target_domain}'.format(target_domain=self.other_result),
                    'r')
                other_result = [
                    subdomain.strip() for subdomain in subdomain_log
                ]
                subdomain_log.close()
            except Exception:
                print('subdomain log is not exist')
                sys.exit(1)
        elif (log_type == list):
            other_result = self.other_result
        else:
            other_result = []
        for subdomain in other_result:
            other_subdomain_list.append(subdomain.strip().strip("."))
        return other_subdomain_list

    def extract_next_sub_log(self):
        other_subdomain_list = self.load_result_from_other()
        for subdomain in other_subdomain_list:
            if (('.' + str(self.target_domain)) in subdomain):
                self.queues.put(subdomain)
            subname = subdomain.strip(".").replace(self.target_domain,
                                                   "").strip(".")
            if subname != "":
                sub_list = subname.split(".")
                for sub in sub_list:
                    self.set_next_sub.add(sub.strip())

    def check_nameservers(self):
        print("[+] Seraching fastest nameserver,it will take a few minutes")
        server_info = {}
        i = 0
        sys.stdout.write(self.cmdline + '[+] Searching nameserver process:' +
                         str(round(i * 100.00 / len(self.nameservers), 2)) +
                         "% ")
        sys.stdout.flush()
        for nameserver in self.nameservers:
            i = i + 1
            self.resolver.nameservers = [nameserver]
            self.resolver.lifetime = 3
            start = time.time()
            for _ in range(2):
                random_str = str(random.randint(1, 1000))
                domain_list = [
                    random_str + "testnamservspeed.com" for _ in range(200)
                ]
                coroutines = [
                    gevent.spawn(self.query_domain, l) for l in domain_list
                ]
                gevent.joinall(coroutines)
            end = time.time()
            cost = end - start
            server_info[nameserver] = cost
            sys.stdout.write('\r' + '[+] Searching nameserver process:' +
                             str(round(i * 100.00 /
                                       len(self.nameservers), 2)) + "% ")
            sys.stdout.flush()
        nameserver = sorted(server_info.items(),
                            key=lambda server_info: server_info[1])[0][0]
        print(self.cmdline)
        print("[+] Search completed,fastest nameserver: " + nameserver)
        self.ip_block_dict = dict()
        self.cname_block_dict = dict()
        self.resolver.lifetime = self.timeout
        self.resolver.nameservers = [nameserver]

    def check_cdn(self, cname_list, domain):
        for cname in cname_list:
            cname = cname.lower().rstrip(".")
            domain = domain.lower()
            for cdn in self.set_cdn:
                if (cdn in cname):
                    return True
            if (domain in cname):
                cname_list = cname.split(domain)
                if (cname_list[1] != ""):
                    self.cdn_set.add(cname_list[1].strip("."))
                    return True
            elif ('cdn' in cname or 'cache' in cname):
                self.cdn_set.add(cname)
                return True
            self.cname_set.add(cname)
        return False

    def get_type_id(self, name):
        return dns.rdatatype.from_text(name)

    def query_domain(self, domain):
        list_ip, list_cname = [], []
        try:
            record = self.resolver.query(domain)
            for A_CNAME in record.response.answer:
                for item in A_CNAME.items:
                    if item.rdtype == self.get_type_id('A'):
                        list_ip.append(str(item))
                        self.ip_block_dict[domain] = list_ip
                    elif (item.rdtype == self.get_type_id('CNAME')):
                        list_cname.append(str(item))
                        self.cname_block_dict[domain] = list_cname
        except dns.exception.Timeout:
            self.queues.put(domain)
        except Exception as e:
            pass

    def get_block(self):
        domain_list = list()
        if (self.queues.qsize() > self.segment_num):
            for _ in range(self.segment_num):
                domain_list.append(self.queues.get())
        else:
            for _ in range(self.queues.qsize()):
                domain_list.append(self.queues.get())
        return domain_list

    def get_black_subdomain(self):
        temp_list = list()
        temp_set = set()
        for subdomain_list in self.black_ip_dict.values():
            temp_list.extend(subdomain_list)
        black_subdomain = set(temp_list) - self.white_filter_subdomain
        for domain in black_subdomain:
            for next_sub in self.set_next_sub:
                subdomain = "{next}.{domain}".format(next=next_sub,
                                                     domain=domain)
                temp_set.add(subdomain)
        return temp_set

    def judge_speed(self, speed):
        if (speed == "low"):
            segment_num = config.low_segment_num
        elif (speed == "high"):
            segment_num = config.high_segment_num
        else:
            segment_num = config.medium_segment_num
        return segment_num

    def generate_sub(self):
        try:
            domain = self.queue_sub.get_nowait()
            for next_sub in self.set_next_sub:
                subdomain = "{next}.{domain}".format(next=next_sub.strip(),
                                                     domain=domain)
                self.queues.put_nowait(subdomain)
            return True
        except Exception as e:
            return False

    def deweighting_subdomain(self):
        temp_list = list()
        for subdomain, ip_list in self.ip_block_dict.items():
            ip_str = str(sorted(ip_list))
            if ip_str not in self.black_ip_dict.keys():
                if (self.ip_count_dict.__contains__(ip_str)):
                    self.ip_subdomain_dict[ip_str].append(subdomain)
                    if (self.ip_count_dict[ip_str] > config.ip_max_count):
                        temp_list.append(subdomain)
                    else:
                        self.ip_count_dict[
                            ip_str] = self.ip_count_dict[ip_str] + 1
                else:
                    self.ip_subdomain_dict[ip_str] = [subdomain]
                    self.ip_count_dict[ip_str] = 1

                for ip in ip_list:
                    if ip in config.waiting_fliter_ip:
                        temp_list.append(subdomain)
                    if (IP(ip).iptype() != 'PUBLIC'):
                        temp_list.append(subdomain)
            else:
                temp_list.append(subdomain)

        for ip_str, count in self.ip_count_dict.items():
            if (count > 10):
                i = 0
                subdomain_list = self.ip_subdomain_dict[ip_str]
                min_subdomain = reduce(
                    lambda x, y: x if len(x) < len(y) else y, subdomain_list)
                for subdomain in subdomain_list:
                    if ("." + min_subdomain in subdomain):
                        i = i + 1
                    if (i > 10):
                        self.black_ip_dict[ip_str] = subdomain_list
                        break

        for subdomain_list in self.black_ip_dict.values():
            temp_list.extend(subdomain_list)

        for subdomain in temp_list:
            try:
                del self.ip_all_dict[subdomain]
            except Exception:
                pass
            try:
                del self.cname_block_dict[subdomain]
                self.white_filter_subdomain.add(subdomain)
            except Exception:
                pass
            try:
                del self.ip_block_dict[subdomain]
            except Exception:
                pass

        self.found_count = self.ip_all_dict.__len__()
        self.ip_all_dict.update(self.ip_block_dict)

        for subdomain, ip_list in self.ip_block_dict.items():
            if (subdomain.count(".") < self.level):
                self.queue_sub.put(subdomain)
        self.ip_block_dict.clear()

    def handle_data(self):
        for subdomain, cname_list in self.cname_block_dict.items():
            if (self.check_cdn(cname_list, self.target_domain)):
                cname_list.append("Yes")
            else:
                cname_list.append("No")
            self.cname_block_dict[subdomain] = cname_list
        for subdomain, ip_list in self.ip_all_dict.items():
            for ip in ip_list:
                iptype = IP(ip).iptype()
                if (iptype != 'PUBLIC'):
                    self.ip_all_dict[subdomain] = "{iptype}({ip})".format(
                        iptype=iptype, ip=ip)
                else:
                    try:
                        key_yes = self.cname_block_dict[subdomain][-1]
                    except KeyError:
                        key_yes = "No"
                    if (key_yes == "No"):
                        CIP = (IP(ip).make_net("255.255.255.0"))
                        if CIP in self.ip_flag_dict:
                            self.ip_flag_dict[CIP] = self.ip_flag_dict[CIP] + 1
                        else:
                            self.ip_flag_dict[CIP] = 1

                        if CIP in self.active_ip_dict:
                            active_ip_list = self.active_ip_dict[CIP]
                            if (ip not in active_ip_list):
                                active_ip_list.append(ip)
                        else:
                            active_ip_list = [ip]
                        self.active_ip_dict[CIP] = active_ip_list

    def raw_write_disk(self):
        if (not os.path.exists(
                'result/{domain}'.format(domain=self.target_domain))):
            os.mkdir('result/{domain}'.format(domain=self.target_domain))
        with open('result/{name}/{name}.csv'.format(name=self.target_domain),
                  'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['DOMAIN', 'CDN', "CNAME", 'IP'])
            for subdomain, ip_list in self.ip_all_dict.items():
                try:
                    flag = self.dict_cname_all[subdomain].pop()
                    cname_list = self.cname_block_dict[subdomain]
                except Exception:
                    flag = "No"
                    cname_list = "Null"
                writer.writerow([subdomain, flag, cname_list, ip_list])
        self.ip_all_dict.clear()
        self.cname_block_dict.clear()

    def deal_write_disk(self):
        ip_flags = sorted(self.ip_flag_dict.items(),
                          key=lambda d: d[1],
                          reverse=True)
        with open(
                'result/{name}/{name}_deal.csv'.format(
                    name=self.target_domain), 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['IP', 'frequency', 'active'])
            for ip_frequency in ip_flags:
                writer.writerow([
                    ip_frequency[0], ip_frequency[1],
                    self.active_ip_dict[ip_frequency[0]]
                ])

    def collect_cname(self):
        with open('result/cname.txt', 'a') as txt:
            for cname in self.cname_set:
                flag = False
                for cdn in self.set_cdn:
                    if (cdn in cname or self.target_domain in cname):
                        flag = True
                if (flag == False):
                    txt.write('{cname}'.format(cname=cname.strip()) +
                              self.cmdline)
        with open('result/cdn.txt', 'a') as txt:
            for cdn in self.cdn_set:
                txt.write('{cname}'.format(cname=cdn) + self.cmdline)

    def cmd_print(self, wait_size, start, end, i):
        scaned = self.segment_num * i
        cost = end - start
        sys.stdout.write(
            "\r" +
            "[+] Bruting subdomain process domain: {domain} |scaned: {scaned}|found: {found_count} |speed:{velocity} |spend: {spend} min "
            .format(domain=self.target_domain,
                    scaned=scaned,
                    qsize=wait_size,
                    found_count=self.found_count,
                    velocity=round(scaned / cost, 1),
                    spend=round(cost / 60, 1)))
        sys.stdout.flush()

    def run(self):
        start = time.time()
        print("[+] Begin to brute domain")
        i = 0
        while not self.queues.empty() or not self.queue_sub.empty():
            i = i + 1
            domain_list = set(self.get_block()) - self.get_black_subdomain()
            coroutines = [
                gevent.spawn(self.query_domain, l) for l in domain_list
            ]
            try:
                gevent.joinall(coroutines)
            except KeyboardInterrupt:
                print('user stop')
                sys.exit(1)

            self.deweighting_subdomain()
            self.cmd_print(self.queues.qsize(), start, time.time(), i)

            if (self.queues.qsize() < 30000 and self.queue_sub.qsize() > 0):
                while (self.queues.qsize() < 200000):
                    if not self.generate_sub():
                        break

        self.handle_data()
        self.raw_write_disk()
        self.deal_write_disk()
        self.collect_cname()
        print(self.cmdline)
        print("[+] Brute over")
class Round_Button(tk.Label):

    def __init__(self, top, text, size, static_colour, static_t_colour, transformation_colour, transformation_t_colour, background:str='#FFFFFF', static_outline=None, trans_outline=None):

        '''

        :param top: Top level / root. The window in which the button is going to be placed. [Tkinter Object]
        :param text: Text that is placed on the button. [String]
        :param size: Multiplier for the size. [Integer]
        :param static_colour: Colour for the button when static. [Tuple,(R,G,B)]
        :param static_t_colour: Colour for the text when the button is static. [Tuple,(R,G,B)]
        :param transformation_colour: Colour for the button when cursor is over it. [Tuple,(R,G,B)]
        :param transformation_t_colour: Colour for the text when the cursor is over the button. [Tuple,(R,G,B)]
        :param background: Sets the background colour of the Button so it can blend with the window's background [Tuple, (RGB)] Defaults to WHITE (#FFFFFF)
        :param static_outline: outline colour of static image. [Tuple, (RGB)] Defaults to static_colour value.
        :param trans_outline: outline colour of transformed image. [Tuple, (RGB)] Defaults to transformation_colour value.

        '''

        ## Initialisation
        ## ==============

        tk.Label.__init__(self, top)  # Inherits the features of a label
        self.sc = static_colour
        self.tc = transformation_colour
        self.tsc = static_t_colour
        self.ttc = transformation_t_colour
        self.multi = size
        self.resoltuion = (int(35*size), int(10*size)) # 3.5 : 1 (W : H)
        self.text = text
        self.change_to_trans = False
        self.change_to_static = False

        self.static_outline = static_outline
        self.trans_outline = trans_outline
        if static_outline == None:
            self.static_outline = static_colour

        if trans_outline == None:
            self.trans_outline = transformation_colour


        self.create_custom_image() #Create static and transformed buttons
        self.create_lower_button() #Creates Lower Button
        self.connect_function()
        self.configure(image=self.Images[9]) #Inserts static button images
        self.configure(background=background)
        self.bind("<Enter>", self.on_enter) #Hover on capabilities
        self.bind("<Leave>", self.on_leave) #Hover off capabilities
        self.queue = Queue()
        self.Animator = Thread(target=self.Manage_Animation)
        self.Animator.start()


    def create_custom_image(self):

        decrement = -1
        while True:
            # < decrement > : Used for lowering the font size so that the text doesn't go off the screen.
            decrement += 1
            font = ImageFont.truetype("Assets/GentiumBasic-Bold.ttf", int(5.5 * self.multi) - decrement, encoding="unic")
            coords, Lines, line_height = self.draw_multiple_line_text(self.text, font, int(36 * self.multi), int(2 * self.multi), 12)
            if coords[-1][1] + line_height + 5 > self.resoltuion[1]:
                continue
            break

        self.images = [Image.new('RGBA', (self.resoltuion)) for i in range (10)]

        # Initialising the draw the ImageDraw.Draw object
        self.image_drawer = [ImageDraw.Draw(self.images[i]) for i in range (10)]
        self.image_colours = [[self.tc[i] + ((self.sc[i]-self.tc[i])//10)*x for i in range (3)] for x in range (10)]
        self.text_colours = [[self.ttc[i] + ((self.tsc[i] - self.ttc[i]) // 10) * x for i in range(3)] for x in range(10)]
        self.outline_colours = [[self.trans_outline[i] + ((self.static_outline[i] - self.trans_outline[i]) // 10) * x for i in range(3)] for x in range(10)]
        for i in range(10):

            # Puts the colours in a tuple for use.
            colour = (self.image_colours[i][0],self.image_colours[i][1],self.image_colours[i][2])
            textcolour = (self.text_colours[i][0], self.text_colours[i][1], self.text_colours[i][2])
            outline = (self.outline_colours[i][0], self.outline_colours[i][1], self.outline_colours[i][2])

            # Creates the base for both images (Rectangles)

            self.image_drawer[i].rectangle((int(5.5 * self.multi),0, self.resoltuion[0] - int(5.5 * self.multi), self.resoltuion[1]-1), outline=outline, width =2, fill=colour)

            # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect.
            # 2 on both sides for 2 images.

            self.image_drawer[i].rectangle((self.resoltuion[0] - int(5.5 * self.multi), 0, self.resoltuion[0], self.resoltuion[1]-2),fill=(0, 0, 0, 0))
            self.image_drawer[i].ellipse((self.resoltuion[0] - int(10 * self.multi), 0, self.resoltuion[0]-1, self.resoltuion[1]-2),outline=outline, width=2, fill=colour)

            self.image_drawer[i].rectangle((0, 0, int(5.5 * self.multi), int(10 * self.multi)-2), fill=(0, 0, 0, 0))
            self.image_drawer[i].ellipse((0, 0, int(10 * self.multi), int(10 * self.multi)-2), outline=outline, width=2 ,fill=(colour))

            self.image_drawer[i].rectangle((int(5.5 * self.multi), 2, self.resoltuion[0] - int(5.5 * self.multi), self.resoltuion[1]-3), fill=colour)

            for x in range (len(coords)):
                self.image_drawer[i].text(coords[x], Lines[x], fill=textcolour, font=font, align='center')

        self.Images = [ImageTk.PhotoImage(self.images[i]) for i in range (10)]

    def create_lower_button(self):
        multi_d = 0.25
        multi = self.multi  - multi_d
        resoltuion = (int(35 * multi), int(10*multi))
        decrement = -1
        while True:
            # < decrement > : Used for lowering the font size so that the text doesn't go off the screen.
            decrement += 1
            font = ImageFont.truetype("Assets/GentiumBasic-Bold.ttf", int(5.5 * multi) - decrement,encoding="unic")
            coords, Lines, line_height = self.draw_multiple_line_text(self.text, font, int(36 * multi),int(2 * multi), 12)
            if coords[-1][1] + line_height + 5 > self.resoltuion[1]-(10*multi_d):
                continue
            break


        self.lower_button = Image.new('RGBA', (resoltuion))

        # Initialising the draw the ImageDraw.Draw object
        self.lower_drawer = ImageDraw.Draw(self.lower_button)

        colour = (self.image_colours[0][0], self.image_colours[0][1], self.image_colours[0][2])
        textcolour = (self.text_colours[0][0], self.text_colours[0][1], self.text_colours[0][2])
        outline = (self.outline_colours[0][0], self.outline_colours[0][1], self.outline_colours[0][2])

        # Creates the base for both images (Rectangles)


        # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect.
        # 2 on both sides for 2 images.


        self.lower_drawer.rectangle((0, 0, resoltuion[0], resoltuion[1]-1), outline=outline, width=2,  fill=colour)

        # Create a rectangle to remove the unwanted areas of colour, and adds an elipses to give a round effect.
        # 2 on both sides for 2 images.

        # Right side
        self.lower_drawer.rectangle((resoltuion[0] - int(5.5*multi), 0, resoltuion[0], resoltuion[1]),fill=(0, 0, 0, 0))
        self.lower_drawer.ellipse((resoltuion[0] - int(10*multi), 0, resoltuion[0], resoltuion[1]), outline=outline, width=2, fill=colour)

        # Left side
        self.lower_drawer.rectangle((0, 0, int(5.5 * multi), int(10 * multi)), fill=(0, 0, 0, 0))
        self.lower_drawer.ellipse((0, 0, int(10 * multi), int(10 * multi)), outline=outline, width=2, fill=(colour))

        self.lower_drawer.rectangle((int(5.5 * multi), 2, resoltuion[0] - int(5.5*multi), resoltuion[1]-3), fill=colour)

        for x in range(len(coords)):
            self.lower_drawer.text(coords[x], Lines[x], fill=textcolour, font=font, align='center')

        delta_x = (self.resoltuion[0] - resoltuion[0])//2
        delta_y = (self.resoltuion[1] - resoltuion[1])//2


        #Perfects the size for pasting.
        self.lower_button = self.lower_button.resize(size=(self.resoltuion[0] - delta_x*2, self.resoltuion[1] - delta_y*2))

        #Pasting Image ontop of transparent image with original resolution.
        self.Button = Image.new('RGBA', (self.resoltuion))
        self.Button.paste(self.lower_button, (delta_x, delta_y, self.resoltuion[0] - delta_x, self.resoltuion[1] - delta_y), self.lower_button)

        self.lower_button = ImageTk.PhotoImage(self.Button)



    def draw_multiple_line_text(self, text, font, text_start_width, text_start_height, Line_Width):
        ## Used for creating multi-line text. Splits the text across multiple lines if the text crosses the line width.

        y_text = text_start_height
        x_text = text_start_width
        lines = textwrap.wrap(text, width=int(Line_Width))
        Coords = []
        Lines = []
        line_height = 0
        for line in lines:
            line_width, line_height = font.getsize(line)
            coords = [(x_text - line_width) / 2, y_text]
            y_text += line_height
            Coords.append(coords)
            Lines.append(line)
        return Coords, Lines, line_height

    ## Animation Effect.
    ## Hovering.

    def on_enter(self,*args):
        #switches images to the transformed button.
        self.Q_Dump()
        self.queue.put('E')

    def Q_Dump(self):
        for i in range (self.queue.qsize()):
            self.queue.get_nowait()

    def on_leave(self,*args):
        #switches back to static image.
        self.Q_Dump()
        self.queue.put('L')

    def Manage_Animation(self):
        while True:
            Factor = self.queue.get()
            if Factor == 'E':
                self.change_sc()
            elif Factor == "L":
                self.change_tsc()


    def change_sc(self, si:int=9):
        self.change_to_static = True
        for i in range (si,0,-1):
            if self.change_to_trans == True:
                self.change_to_static = False
                self.change_tsc(i)
                break
            sleep(0.01)
            self.configure(image=self.Images[i])

        if self.change_to_static:
            self.change_to_static = False

    def change_tsc(self, si:int=0):

        self.change_to_trans = True
        for i in range (si, 10):
            if self.change_to_static == True:
                self.change_to_trans = False
                self.change_sc(i)
                break
            sleep(0.01)
            self.configure(image=self.Images[i])

        if self.change_to_trans:
            self.change_to_trans = False


    def connect_function(self, function=lambda:None):
        #Binds the button to a function.

        def connector(*args):
            self.configure(image=self.lower_button)
            function()

        def disconnector(*args):
            self.configure(image=self.Images[0])

        self.bind("<ButtonPress-1>", connector)
        self.bind("<ButtonRelease-1>", disconnector)
class EventManager(MetricsReporter):

    def __init__(self, event_iterable, event_handlers, event_timeout=DEFAULT_EVENT_TIMEOUT_SECS):
        self.__reg = None
        self.__tags = None
        self.__stopped = False
        self.__q = Queue()

        self.__events = event_iterable
        self.__event_handlers = event_handlers
        self.__event_timeout = event_timeout

        self.__processed_count = 0

        self.__started = False
        self.__started_lock = Lock()

        self.__processing_thread = Thread(target=self.__process_events)
        self.__pulling_thread = Thread(target=self.__pull_events)
        self.last_successful_event_epoch_s = 0

        config_manager = get_config_manager()

        rebalance_frequency = config_manager.get_float(REBALANCE_FREQUENCY_KEY, DEFAULT_REBALANCE_FREQUENCY)
        if rebalance_frequency > 0:
            schedule.every(rebalance_frequency).seconds.do(self.__rebalance)

        reconcile_frequency = config_manager.get_float(RECONCILE_FREQUENCY_KEY, DEFAULT_RECONCILE_FREQUENCY)
        if reconcile_frequency > 0:
            schedule.every(reconcile_frequency).seconds.do(self.__reconcile)

        oversubscribe_frequency = config_manager.get_float(OVERSUBSCRIBE_FREQUENCY_KEY,
                                                           DEFAULT_OVERSUBSCRIBE_FREQUENCY)
        if oversubscribe_frequency > 0:
            schedule.every(oversubscribe_frequency).seconds.do(self.__oversubscribe)

    def join(self):
        self.__pulling_thread.join()
        self.__processing_thread.join()

    def stop_processing_events(self):
        self.__stopped = True
        self.__events.close()
        self.join()

    def start_processing_events(self):
        with self.__started_lock:
            if self.__started:
                return

            self.__processing_thread.start()
            self.__pulling_thread.start()
            self.__started = True

    def get_queue_depth(self):
        return self.__q.qsize()

    def get_processed_count(self):
        return self.__processed_count

    def __rebalance(self):
        self.__put_event(REBALANCE_EVENT)

    def __reconcile(self):
        self.__put_event(RECONCILE_EVENT)

    def __oversubscribe(self):
        self.__put_event(OVERSUBSCRIBE_EVENT)

    def __pull_events(self):
        for event in self.__events:
            self.__put_event(event)

    def __put_event(self, event):
        event = json.loads(event.decode("utf-8"))
        if event[ACTION] in HANDLED_ACTIONS:
            log.info("Enqueuing event: {}, queue depth: {}".format(event[ACTION], self.get_queue_depth()))
            event[ENQUEUE_TIME_KEY] = time.time()
            self.__q.put(event)
            if self.__reg is not None:
                self.__reg.counter(ENQUEUED_COUNT_KEY, self.__tags).increment()
                self.__reg.counter(self.__get_enqueued_metric_name(event), self.__tags).increment()

    def __process_events(self):
        while not self.__stopped:
            try:
                event = self.__q.get(timeout=self.__event_timeout)
                dequeue_time = time.time()
                log.info("Dequeued event: {}, queue depth: {}".format(event[ACTION], self.get_queue_depth()))
                if self.__reg is not None:
                    self.__reg.counter(DEQUEUED_COUNT_KEY, self.__tags).increment()
                    self.__reg.counter(self.__get_dequeued_metric_name(event), self.__tags).increment()
                    self.__reg.distribution_summary(QUEUE_LATENCY_KEY, self.__tags).record(dequeue_time - event[ENQUEUE_TIME_KEY])
            except Empty:
                log.debug("Timed out waiting for event on queue.")
                continue

            for event_handler in self.__event_handlers:
                try:
                    log.info("{} handling event: {}".format(type(event_handler).__name__, event[ACTION]))
                    event_handler.handle(event)
                    self.__report_succeeded_event(event_handler)
                except:
                    log.exception("Event handler: '{}' failed to handle event: '{}'".format(
                        type(event_handler).__name__, event))
                    self.__report_failed_event(event_handler)

            self.__q.task_done()
            self.__reg.counter(EVENT_PROCESSED_KEY, self.__tags).increment()
            self.__reg.gauge(QUEUE_DEPTH_KEY, self.__tags).set(self.get_queue_depth())
            self.__processed_count += 1

    def __report_succeeded_event(self, event_handler: EventHandler):
        if self.__reg is not None:
            self.__reg.counter(self.__get_event_succeeded_metric_name(event_handler), self.__tags).increment()
            self.__reg.counter(EVENT_SUCCEEDED_KEY, self.__tags).increment()
            self.last_successful_event_epoch_s = datetime.utcnow().timestamp()

    def __report_failed_event(self, event_handler: EventHandler):
        if self.__reg is not None:
            self.__reg.counter(self.__get_event_failed_metric_name(event_handler), self.__tags).increment()
            self.__reg.counter(EVENT_FAILED_KEY, self.__tags).increment()

    @staticmethod
    def __get_event_succeeded_metric_name(event_handler: EventHandler) -> str:
        return "titus-isolate.{}.eventSucceeded".format(type(event_handler).__name__)

    @staticmethod
    def __get_event_failed_metric_name(event_handler: EventHandler) -> str:
        return "titus-isolate.{}.eventFailed".format(type(event_handler).__name__)

    @staticmethod
    def __get_enqueued_metric_name(event) -> str:
        return "titus-isolate.{}.eventEnqueued".format(event[ACTION])

    @staticmethod
    def __get_dequeued_metric_name(event) -> str:
        return "titus-isolate.{}.eventDequeued".format(event[ACTION])

    def set_registry(self, registry, tags):
        self.__reg = registry
        self.__tags = tags

    def report_metrics(self, tags):
        pass
Example #46
0
class QA_Thread(threading.Thread):
    '''
    这是一个随意新建线程的生产者消费者模型'
    其实有个队列, 队列中保存的是 QA_Task 对象 , callback 很重要,指定任务的时候可以绑定 函数执行
    QA_Engine 继承这个类。

    自带一个Queue
    有 self.put/ self.put_nowait/ self.get/ self.get_nowait 4个关于queue的方法        

    如果你重写了run方法:
    则你需要自行处理queue中的事情/简单的做你自己的逻辑
    '''
    def __init__(self, queue=None, name=None, daemon=False):
        threading.Thread.__init__(self)
        self.queue = Queue() if queue is None else queue
        self.thread_stop = False
        self.__flag = threading.Event()  # 用于暂停线程的标识
        self.__flag.set()  # 设置为True
        self.__running = threading.Event()  # 用于停止线程的标识
        self.__running.set()  # 将running设置为True
        self.name = QA_util_random_with_topic(topic='QA_Thread',
                                              lens=3) if name is None else name
        self.idle = False
        self.daemon = daemon

    def __repr__(self):
        return '<QA_Thread: {}  id={} ident {}>'.format(
            self.name, id(self), self.ident)

    def run(self):
        while self.__running.isSet():
            self.__flag.wait()
            while not self.thread_stop:
                '这是一个阻塞的队列,避免出现消息的遗漏'
                try:
                    if self.queue.empty() is False:
                        _task = self.queue.get()  # 接收消息
                        #print(_task.worker, self.name)
                        assert isinstance(_task, QA_Task)
                        if _task.worker != None:
                            _task.do()
                            self.queue.task_done()  # 完成一个任务
                        else:
                            pass
                    else:
                        self.idle = True
                        # Mac book下风扇狂转,如果sleep cpu 占用率回下降
                        # time.sleep(0.01)
                except Exception as e:
                    if isinstance(e, ValueError):
                        pass
                    else:
                        raise e

    def pause(self):
        self.__flag.clear()

    def resume(self):
        self.__flag.set()  # 设置为True, 让线程停止阻塞

    def stop(self):
        # self.__flag.set()       # 将线程从暂停状态恢复, 如何已经暂停的话
        self.__running.clear()
        self.thread_stop = True  # 设置为False

    def __start(self):
        self.queue.start()

    def put(self, task):
        self.queue.put(task)

    def put_nowait(self, task):
        self.queue.put_nowait(task)

    def get(self):
        return self.queue.get()

    def get_nowait(self):
        return self.queue.get_nowait()

    def qsize(self):
        return self.queue.qsize()
Example #47
0
n, m = map(int, input().split())
tree = {}
for _ in range(m):
    father, _, *son = map(int, input().split())
    tree[father] = son
from queue import Queue
q = Queue()
q.put(1)
level = []
while q.qsize():
    cnt = 0
    size = q.qsize()
    while size:
        size -= 1
        temp = q.get()
        if tree.get(temp) is None:
            cnt += 1
        else:
            for each in tree[temp]:
                q.put(each)
    level.append(cnt)
for i in range(len(level)):
    print("%d" % level[i], end='')
    if i != len(level) - 1:
        print(" ", end='')
Example #48
0
class BaseServer(TcBase):
    def __init__(self, interface="0.0.0.0", port=9998):
        super().__init__()
        self.port = port
        self.interface = interface
        self.sock = None

        self.clients = {}
        self.send_q = Queue()

        self.running = False
        self.server_thread = None
        self.ready_flag = True

        self.newClientEvent = TcEvent()
        self.clientRemovedEvent = TcEvent()
        self.clientsClearedEvent = TcEvent()

    def start(self, interface="0.0.0.0", port=9998):
        self.interface = interface
        self.port = port
        self.server_thread = Thread(target=self.server)
        self.running = True
        self.server_thread.start()

    def stop(self):
        self.running = False
        self.logger.info("Server stopped")

    def server(self):
        try:
            self.sock = socket.socket(
                socket.AF_INET,  # Internet
                socket.SOCK_DGRAM)  # UDP
            self.sock.setblocking(False)
            self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            self.sock.bind((self.interface, self.port))
        except socket.error as e:
            self.logger.error(f"Error creating socket: {e}")
            self.stop()

        self.logger.info(
            f"UDP Server started.  Listening on port: {self.port}")

        # TODO: Make more efficient/elegant
        while self.running:
            self.remove_old_clients()
            # Send messages from send queue
            self.send_msgs()
            readable, writeable, errors = select.select([self.sock], [], [], 0)
            # print(readable)
            for sock in readable:
                try:
                    d = sock.recvfrom(128)
                    self.handle_msg(d)
                except socket.error as e:
                    self.logger.error(e)
            time.sleep(0.05)

        self.remove_clients()
        self.sock.close()
        self.logger.debug(f"Socket closed")

    def remove_old_clients(self):
        # Identify clients that haven't sent a heartbeat for a while
        now = time.time()
        remove = set()
        for c, hb_ts in self.clients.items():
            if now - hb_ts > 20:
                remove.add(c)

        for c in remove:
            self.remove_client(c)

    def add_client(self, client):
        self.logger.info(f"New client connection from: {client}")
        self.clients[client] = time.time()
        self.newClientEvent.emit(client)

    def remove_client(self, client):
        self.logger.info(f"Removing client connection: {client}")
        self.clients.pop(client)
        if not self.clients:
            self.logger.info(
                "No clients connected.  Waiting for new client connections...")
        self.clientRemovedEvent.emit(client)

    def remove_clients(self):
        self.clients = {}
        self.logger.debug("Client list cleared")
        self.clientsClearedEvent.emit()

    def handle_heartbeat(self, client):
        now = time.time()
        if client not in self.clients:
            self.add_client(client)
        self.clients[client] = now
        # self.logger.debug(f"Heartbeat from: {client}")
        # self.heartbeat_reply(client)

    def heartbeat_reply(self, client):
        self.add_to_send_queue("HB REPLY", client)

    def add_to_send_queue(self, msg, client):
        msg_pkg = (msg, (client[0], client[1]))
        self.send_q.put(msg_pkg)

    def send_msgs(self):
        while self.send_q.qsize() > 0:
            msg_pkg = self.send_q.get()
            self.send_msg(msg_pkg[0], msg_pkg[1])

    def send_msg(self, msg: str, client):
        try:
            self.sock.sendto(msg.encode('utf-8'), client)
            self.logger.debug(f"MSG SENT: {msg} {client}")
        except socket.error as e:
            self.logger.error(
                f"There was a socket error while attempting to send a message: {e}"
            )

    def handle_msg(self, d):
        msg = d[0].decode()
        client = d[1]
        self.logger.debug(f"MSG RECV: {client} {msg}")

        if msg == "/sync/add" or client not in self.clients:
            self.add_client(client)
        elif msg == "/sync/remove":
            self.remove_client(client)
        elif msg == "/heartbeat":
            self.handle_heartbeat(client)
        else:
            self.logger.error(f"UNHANDLED MESSAGE: {client} {msg}")
Example #49
0
File: ib.py Project: bohblue2/aat
class InteractiveBrokersExchange(Exchange):
    '''Interactive Brokers Exchange'''
    def __init__(self,
                 trading_type,
                 verbose,
                 account='',
                 delayed=True,
                 **kwargs):
        self._trading_type = trading_type
        self._verbose = verbose

        if self._trading_type == TradingType.LIVE:
            super().__init__(ExchangeType('interactivebrokers'))
        else:
            super().__init__(ExchangeType('interactivebrokerspaper'))

        # map order.id to order
        self._orders = {}

        # IB TWS gateway
        self._order_event_queue = Queue()
        self._market_data_queue = Queue()
        self._contract_lookup_queue = Queue()
        self._account_position_queue = Queue()
        self._api = _API(account, self.exchange(), delayed,
                         self._order_event_queue, self._market_data_queue,
                         self._contract_lookup_queue,
                         self._account_position_queue)

    # *************** #
    # General methods #
    # *************** #
    async def instruments(self):
        '''get list of available instruments'''
        return []

    async def connect(self):
        '''connect to exchange. should be asynchronous.

        For OrderEntry-only, can just return None
        '''
        if self._trading_type == TradingType.LIVE:
            print('*' * 100)
            print('*' * 100)
            print('WARNING: LIVE TRADING')
            print('*' * 100)
            print('*' * 100)
            self._api.connect('127.0.0.1', 7496, randint(0, 10000))
            self._api_thread = threading.Thread(target=self._api.run,
                                                daemon=True)
            self._api_thread.start()

        else:
            self._api.connect('127.0.0.1', 7497, randint(0, 10000))
            self._api_thread = threading.Thread(target=self._api.run,
                                                daemon=True)
            self._api_thread.start()

        while self._api.nextOrderId is None:
            print('waiting for IB connect...')
            await asyncio.sleep(1)

        print('IB connected!')

    async def lookup(self, instrument):
        self._api.reqContractDetails(_constructContract(instrument))
        i = 0
        while i < 5:
            if self._contract_lookup_queue.qsize() > 0:
                ret = []
                while self._contract_lookup_queue.qsize() > 0:
                    contract_details = self._contract_lookup_queue.get()
                    ret.append(_constructInstrument(contract_details.contract))
                return ret
            else:
                await asyncio.sleep(1)
                i += 1

    # ******************* #
    # Market Data Methods #
    # ******************* #
    async def subscribe(self, instrument):
        self._api.subscribeMarketData(instrument)

    async def tick(self):
        '''return data from exchange'''
        while True:
            # clear order events
            while self._order_event_queue.qsize() > 0:
                order_data = self._order_event_queue.get()
                status = order_data['status']
                order = self._orders[order_data['orderId']]

                if status in ('ApiPending', 'PendingSubmit', 'PendingCancel',
                              'PreSubmitted', 'ApiCancelled', 'Inactive'):
                    # ignore
                    continue

                elif status in ('Submitted', ):
                    # TODO more granular order events api?
                    # ignore
                    pass

                elif status in ('Cancelled', ):
                    e = Event(type=EventType.CANCELED, target=order)
                    yield e

                elif status in ('Filled', ):
                    # set filled
                    order.filled = order_data['filled']

                    # create trade object
                    t = Trade(volume=order_data['filled'],
                              price=order_data['avgFillPrice'],
                              maker_orders=[],
                              taker_order=order)

                    # set my order
                    t.my_order = order

                    e = Event(type=EventType.TRADE, target=t)
                    yield e

            # clear market data events
            while self._market_data_queue.qsize() > 0:
                market_data = self._market_data_queue.get()
                instrument = market_data['instrument']
                price = market_data['price']
                o = Order(volume=1,
                          price=price,
                          side=Side.BUY,
                          instrument=instrument,
                          exchange=self.exchange())
                t = Trade(volume=1,
                          price=price,
                          taker_order=o,
                          maker_orders=[])
                yield Event(type=EventType.TRADE, target=t)

            await asyncio.sleep(0)

        # clear market data events
        # TODO

    # ******************* #
    # Order Entry Methods #
    # ******************* #
    async def accounts(self):
        '''get accounts from source'''
        self._api.reqPositions()
        i = 0
        while i < 5:
            if self._account_position_queue.qsize() > 0:
                return self._account_position_queue.get()
            else:
                await asyncio.sleep(1)
                i += 1

    async def newOrder(self, order):
        '''submit a new order to the exchange. should set the given order's `id` field to exchange-assigned id

        For MarketData-only, can just return None
        '''

        # construct IB contract and order
        ibcontract, iborder = _constructContractAndOrder(order)

        # send to IB
        id = self._api.placeOrder(ibcontract, iborder)

        # update order id
        order.id = id
        self._orders[order.id] = order

    async def cancelOrder(self, order: Order):
        '''cancel a previously submitted order to the exchange.

        For MarketData-only, can just return None
        '''
        self._api.cancelOrder(order.id)
Example #50
0
    def train(self, texts, chunksize=100, workers=2):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        logger.info("training model with %i workers" % (workers))

        start, next_report = time.time(), [1.0]
        jobs = Queue(
            maxsize=2 * workers
        )  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock(
        )  # for shared state (=number of words trained so far, log reports...)

        total_error = [0.0]
        objects_done = [0]

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            observation_work = np.zeros(self.window * self.size +
                                        self.object_size,
                                        dtype=REAL)
            prediction_work = np.zeros(self.output_size, dtype=REAL)
            composition_work = np.zeros(
                [
                    max(self.output_size, self.window * self.size +
                        self.object_size), self.window * self.size +
                    self.object_size
                ],
                dtype=REAL) if self.bilinear_form else None

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                error = sum(
                    train_sentence_concatenation(
                        self, sentence, object_index, softmax_target,
                        sigmoid_target, self._alpha, prediction_work,
                        observation_work, composition_work) for sentence,
                    object_index, softmax_target, sigmoid_target in job)
                with lock:
                    total_error[0] += error
                    objects_done[0] += len(job)
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: %s objects, %.0f objects/s" %
                                    (objects_done[0], float(objects_done[0]) /
                                     elapsed if elapsed else 0.0))
                        next_report[
                            0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        dynos = [
            threading.Thread(target=worker_train) for _ in range(0, workers)
        ]
        for thread in dynos:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue
        no_oov = ((np.array([self.vocab.get_index(word) for word in sentence],
                            dtype=INT), object_index, softmax_target,
                   sigmoid_target) for sentence, object_index, softmax_target,
                  sigmoid_target in texts)
        for job_no, job in enumerate(gensim_utils.grouper(no_oov, chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" %
                         (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info(
            "reached the end of input; waiting to finish %i outstanding jobs" %
            jobs.qsize())

        for _ in range(0, workers):
            jobs.put(
                None
            )  # give the workers heads up that they can finish -- no more work!

        for thread in dynos:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i objects took %.1fs, %.0f words/s" %
                    (objects_done[0], elapsed,
                     objects_done[0] / elapsed if elapsed else 0.0))

        return (objects_done[0], total_error[0])
Example #51
0
class QiuBai:
    def __init__(self):
        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
        }
        self.urlQueue = Queue()
        self.htmlQueue = Queue()
        self.contentQueue = Queue()

    '''
    获取所有页面的url,并返回urllist
    '''

    def getTotalUrl(self):
        urlTemp = "https://www.qiushibaike.com/hot/page/{}/"
        for i in range(1, 36):
            self.urlQueue.put(urlTemp.format(i))
        print("getTotalUrl is over", self.urlQueue.qsize())

    '''
    发送请求,获取响应,同事etree处理html
    '''

    def parseUrl(self):
        while self.urlQueue.not_empty:
            url = self.urlQueue.get()
            print("parsing Url: ", url)
            response = requests.get(url, headers=self.headers, timeout=10)
            #获取html字符串
            html = response.content.decode()
            #获取element类型的html
            html = etree.HTML(html)
            self.htmlQueue.put(html)
            #放在此处,保证url已经获得反馈以后再将url设置为任务已完成
            print("pu HQ: ", self.htmlQueue.qsize())
            print("pu UQ: ", self.urlQueue.qsize())
            self.urlQueue.task_done()

    '''
    返回一个list,包含一个url对应页面的所有段子的所有内容的列表
    '''

    def getContent(self):
        print("getContent start!", self.htmlQueue.qsize())
        while self.htmlQueue.not_empty:
            html = self.htmlQueue.get()
            totalDiv = html.xpath("//*[starts-with(@id, 'qiushi_tag_')]")
            items = []
            #遍历totalDiv,获取段子的所有信息
            for i in totalDiv:
                author_img = i.xpath(
                    './div[@class="author clearfix"]/a[1]/img/@src')
                author_img = "https:" + author_img[0] if len(
                    author_img) > 0 else None
                author_name = i.xpath(
                    './div[@class="author clearfix"]/a[2]/h2/text()')
                author_name = author_name[0] if len(author_name) > 0 else None
                author_href = i.xpath(
                    './div[@class="author clearfix"]/a[1]/@href')
                author_href = "https://www.qiushibaike.com" + author_href[
                    0] if len(author_href) > 0 else None
                author_gender = i.xpath(
                    './div[@class="author clearfix"]//div/@class')
                author_gender = author_gender[0].split(" ")[-1].replace(
                    "Icon", "") if len(author_gender) > 0 else None
                author_age = i.xpath(
                    './div[@class="author clearfix"]//div/text()')
                author_age = author_age[0] if len(author_age) > 0 else None
                content = i.xpath('./a[@class="contentHerf"]/div/span/text()')
                content_vote = i.xpath(
                    './div[@class="stats"]/span[1]/i/text()')
                content_vote = content_vote[0] if len(
                    content_vote) > 0 else None
                content_comment_numbers = i.xpath(
                    './div[@class="stats"]/span[2]/a/i/text()')
                content_comment_numbers = content_comment_numbers[0] if len(
                    content_comment_numbers) > 0 else None
                hot_comment_author = i.xpath(
                    './a[@class="indexGodCmt"]/div/span[last()]/text()')
                hot_comment_author = hot_comment_author[0] if len(
                    hot_comment_author) > 0 else None
                hot_comment = i.xpath(
                    './a[@class="indexGodCmt"]/div/div/text()')
                hot_comment = hot_comment[0].replace("\n:", "").replace(
                    "\n", "") if len(hot_comment) > 0 else None
                hot_comment_like_num = i.xpath(
                    './a[@class="indexGodCmt"]/div/div/div/text()')
                hot_comment_like_num = hot_comment_like_num[-1].replace(
                    "\n", "") if len(hot_comment_like_num) > 0 else None
                item = dict(author_name=author_name,
                            author_img=author_img,
                            author_href=author_href,
                            author_gender=author_gender,
                            author_age=author_age,
                            content=content,
                            content_vote=content_vote,
                            content_comment_numbers=content_comment_numbers,
                            hot_comment=hot_comment,
                            hot_comment_author=hot_comment_author,
                            hot_comment_like_num=hot_comment_like_num)
                items.append(item)
            self.contentQueue.put(items)
            print("gt contentQ: ", self.contentQueue.qsize())
            print("gt htmlQ: ", self.htmlQueue.qsize())
            self.htmlQueue.task_done()

    '''
    保持items
    '''

    def saveItems(self):
        print("saveItems start cq: ", self.contentQueue.qsize())
        while self.contentQueue.not_empty:
            items = self.contentQueue.get()
            f = open('./res/qb.txt', "a", encoding='utf-8')
            for i in items:
                json.dump(i, f, ensure_ascii=False, indent=2)
            f.close()
            self.contentQueue.task_done()

    '''
    获取url list
    url_list = self.getTotalUrl
    '''

    def run(self):
        threadList = []

        threadUrl = threading.Thread(target=self.getTotalUrl)
        threadList.append(threadUrl)
        #发送网络请求
        for i in range(10):
            threadParse = threading.Thread(target=self.parseUrl)
            threadList.append(threadParse)

        #提取数据
        threadGetContent = threading.Thread(target=self.getContent)
        threadList.append(threadGetContent)
        #保存
        threadSave = threading.Thread(target=self.saveItems)
        threadList.append(threadSave)
        for t in threadList:
            t.setDaemon(True)  #为每个进程设置为后台进程,效果是如果主进程退出子进程也会退出
            t.start()

        #让主线程等待,所有的队列为空的时候才能退出
        self.urlQueue.join()
        self.htmlQueue.join()
        self.contentQueue.join()
Example #52
0
class RestClient(object):
    """
    HTTP 客户端。目前是为了对接各种RESTfulAPI而设计的。
    
    如果需要给请求加上签名,请设置beforeRequest, 函数类型请参考defaultBeforeRequest。
    如果需要处理非2xx的请求,请设置onFailed,函数类型请参考defaultOnFailed。
    如果每一个请求的非2xx返回都需要单独处理,使用addReq函数的onFailed参数
    如果捕获Python内部错误,例如网络连接失败等等,请设置onError,函数类型请参考defaultOnError
    """

    #----------------------------------------------------------------------
    def __init__(self):
        """
        """
        self.urlBase = None  # type: str
        self._active = False

        self._queue = Queue()
        self._pool = None  # type: Pool
        self._queueing_times = deque(maxlen=100)
        self._response_times = deque(maxlen=100)

    #----------------------------------------------------------------------
    def init(self, urlBase):
        """
        初始化
        :param urlBase: 路径前缀。 例如'https://www.bitmex.com/api/v1/'
        """
        self.urlBase = urlBase

    #----------------------------------------------------------------------
    def _createSession(self):
        """"""
        return requests.session()

    #----------------------------------------------------------------------
    def start(self, n=3):
        """启动"""
        if self._active:
            return

        self._active = True
        self._pool = Pool(n)
        self._pool.apply_async(self._run)

    #----------------------------------------------------------------------
    def stop(self):
        """
        强制停止运行,未发出的请求都会被暂停(仍处于队列中)
        :return:
        """
        self._active = False

    #----------------------------------------------------------------------
    def join(self):
        """
        等待所有请求处理结束
        如果要并确保RestClient的退出,请在调用stop之后紧接着调用join。
        如果只是要确保所有的请求都处理完,直接调用join即可。
        :return:
        """
        self._queue.join()

    #----------------------------------------------------------------------
    def addRequest(
        self,
        method,  # type: str
        path,  # type: str
        callback,  # type: Callable[[dict, Request], Any]
        params=None,  # type: dict
        data=None,  # type: dict
        headers=None,  # type: dict
        onFailed=None,  # type: Callable[[int, Request], Any]
        onError=None,  # type: Callable[[type, Exception, traceback, Request], Any]
        extra=None  # type: Any
    ):  # type: (...)->Request
        """
        发送一个请求
        :param method: GET, POST, PUT, DELETE, QUERY
        :param path: 
        :param callback: 请求成功后的回调(状态吗为2xx时认为请求成功)   type: (dict, Request)
        :param params: dict for query string
        :param data: dict for body
        :param headers: dict for headers
        :param onFailed: 请求失败后的回调(状态吗不为2xx时认为请求失败)(如果指定该值,默认的onFailed将不会被调用) type: (code, dict, Request)
        :param onError: 请求出现Python错误后的回调(如果指定该值,默认的onError将不会被调用) type: (etype, evalue, tb, Request)
        :param extra: 返回值的extra字段会被设置为这个值。当然,你也可以在函数调用之后再设置这个字段。
        :return: Request
        """

        request = Request(method, path, params, data, headers, callback)
        request.extra = extra
        request.onFailed = onFailed
        request.onError = onError
        request.createDatetime = datetime.now()
        request.deliverDatetime = None
        request.responseDatetime = None
        self._queue.put(request)
        return request

    #----------------------------------------------------------------------
    def _run(self):
        session = self._createSession()
        while self._active:
            try:
                request = self._queue.get(timeout=1)
                try:
                    self._processRequest(request, session)
                finally:
                    self._queue.task_done()
            except Empty:
                pass
            except:
                et, ev, tb = sys.exc_info()
                self.onError(et, ev, tb, None)

    #----------------------------------------------------------------------
    def sign(self, request):  # type: (Request)->Request
        """
        所有请求在发送之前都会经过这个函数
        签名之类的前奏可以在这里面实现
        需要对request进行什么修改就做什么修改吧
        @:return (request)
        """
        return request

    #----------------------------------------------------------------------
    def onFailed(self, httpStatusCode, request):  # type:(int, Request)->None
        """
        请求失败处理函数(HttpStatusCode!=2xx).
        默认行为是打印到stderr
        """
        sys.stderr.write(str(request))

    #----------------------------------------------------------------------
    def onError(
            self,
            exceptionType,  # type: type
            exceptionValue,  # type: Exception
            tb,
            request  # type: Optional[Request]
    ):
        """
        Python内部错误处理:默认行为是仍给excepthook
        :param request 如果是在处理请求的时候出错,它的值就是对应的Request,否则为None
        """
        sys.stderr.write(
            self.exceptionDetail(exceptionType, exceptionValue, tb, request))
        sys.excepthook(exceptionType, exceptionValue, tb)

    #----------------------------------------------------------------------
    def exceptionDetail(
            self,
            exceptionType,  # type: type
            exceptionValue,  # type: Exception
            tb,
            request  # type: Optional[Request]
    ):
        text = "[{}]: Unhandled RestClient Error:{}\n".format(
            datetime.now().isoformat(), exceptionType)
        text += "request:{}\n".format(request)
        text += "Exception trace: \n"
        text += "".join(
            traceback.format_exception(
                exceptionType,
                exceptionValue,
                tb,
            ))
        return text

    #----------------------------------------------------------------------
    def _processRequest(self, request,
                        session):  # type: (Request, requests.Session)->None
        """
        用于内部:将请求发送出去
        """
        # noinspection PyBroadException
        try:
            request = self.sign(request)

            url = self.makeFullUrl(request.path)

            request.deliverDatetime = datetime.now()
            self._queueing_times.append(
                (request.deliverDatetime -
                 request.createDatetime).total_seconds())

            response = session.request(request.method,
                                       url,
                                       headers=request.headers,
                                       params=request.params,
                                       data=request.data)
            request.response = response
            request.responseDatetime = datetime.now()

            self._response_times.append(
                (request.responseDatetime -
                 request.deliverDatetime).total_seconds())

            httpStatusCode = response.status_code
            if httpStatusCode / 100 == 2:  # 2xx都算成功,尽管交易所都用200
                jsonBody = response.json()
                request.callback(jsonBody, request)
                request.status = RequestStatus.success
            else:
                request.status = RequestStatus.failed

                if request.onFailed:
                    jsonBody = response.json()
                    request.onFailed(jsonBody, request)
                self.onFailed(httpStatusCode, request)
        except:
            request.status = RequestStatus.error
            t, v, tb = sys.exc_info()
            if request.onError:
                request.onError(t, v, tb, request)
            else:
                self.onError(t, v, tb, request)

    #----------------------------------------------------------------------
    def makeFullUrl(self, path):
        """
        将相对路径补充成绝对路径:
        eg: makeFullUrl('/get') == 'http://xxxxx/get'
        :param path:
        :return:
        """
        url = self.urlBase + path
        return url

    def getStatus(self):
        """
        获取此时client的一些运行时基本信息
        """
        return {
            "queueing_number":
            self._queue.qsize(),
            "avg_queueing_time":
            sum(self._queueing_times) /
            len(self._queueing_times) if len(self._queueing_times) else 0,
            "avg_response_time":
            sum(self._response_times) /
            len(self._response_times) if len(self._response_times) else 0
        }
Example #53
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("[%s] Extracting torrents from %s using definitions: %s" % (provider, provider, repr(definition)))

    if not client.content:
        if get_setting("use_debug_parser", bool):
            log.debug("[%s] Parser debug | Page content is empty" % provider)

        raise StopIteration

    dom = Html().feed(client.content)

    key_search = get_search_query(definition, "key")
    row_search = get_search_query(definition, "row")
    name_search = get_search_query(definition, "name")
    torrent_search = get_search_query(definition, "torrent")
    info_hash_search = get_search_query(definition, "infohash")
    size_search = get_search_query(definition, "size")
    seeds_search = get_search_query(definition, "seeds")
    peers_search = get_search_query(definition, "peers")
    referer_search = get_search_query(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:
        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer):
            try:
                log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(py2_encode(uri[0]), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error("[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            log.debug("[%s] Subpage torrent for %s: %s" % (provider, repr(uri[0]), torrent))
            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        if get_setting("use_debug_parser", bool):
            log.debug("[%s] Parser debug | Could not parse DOM from page content" % provider)

        raise StopIteration

    if get_setting("use_debug_parser", bool):
        log.debug("[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', '')))

    key = eval(key_search) if key_search else ""
    if key_search and get_setting("use_debug_parser", bool):
        key_str = key.__str__()
        log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'key', key_search, key_str.replace('\r', '').replace('\n', '')))

    items = eval(row_search)
    if get_setting("use_debug_parser", bool):
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(items), 'row', row_search))

    for item in items:
        if get_setting("use_debug_parser", bool):
            item_str = item.__str__()
            log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace('\r', '').replace('\n', '')))

        if not item:
            continue

        try:
            name = eval(name_search) if name_search else ""
            torrent = eval(torrent_search) if torrent_search else ""
            size = eval(size_search) if size_search else ""
            seeds = eval(seeds_search) if seeds_search else ""
            peers = eval(peers_search) if peers_search else ""
            info_hash = eval(info_hash_search) if info_hash_search else ""
            referer = eval(referer_search) if referer_search else ""

            if 'magnet:?' in torrent:
                torrent = torrent[torrent.find('magnet:?'):]

            if get_setting("use_debug_parser", bool):
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers))
                if info_hash_search:
                    log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash))
                if referer_search:
                    log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', referer_search, referer))

            # Pass client cookies with torrent if private
            if not torrent.startswith('magnet'):
                user_agent = USER_AGENT

                if client.passkey:
                    torrent = torrent.replace('PASSKEY', client.passkey)
                elif client.token:
                    headers = {'Authorization': client.token, 'User-Agent': user_agent}
                    log.debug("[%s] Appending headers: %s" % (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent)))
                else:
                    parsed_url = urlparse(torrent.split('|')[0])
                    cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                    cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                    cookies = []
                    for cookie in client._cookies:
                        if cookie_domain in cookie.domain:
                            cookies.append(cookie)
                    headers = {}
                    if cookies:
                        headers = {'User-Agent': user_agent}
                        log.debug("[%s] Cookies res: %s / %s" % (provider, repr(headers), repr(client.request_headers)))
                        if client.request_headers:
                            headers.update(client.request_headers)
                        if client.url:
                            headers['Referer'] = client.url
                            headers['Origin'] = client.url
                        # Need to set Cookie afterwards to avoid rewriting it with session Cookies
                        headers['Cookie'] = ";".join(["%s=%s" % (c.name, c.value) for c in cookies])
                    else:
                        headers = {'User-Agent': user_agent}

                    torrent = append_headers(torrent, headers)

            if name and torrent and needs_subpage and not torrent.startswith('magnet'):
                if not torrent.startswith('http'):
                    torrent = definition['root_url'] + py2_encode(torrent)
                t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer))
                threads.append(t)
            else:
                yield (name, info_hash, torrent, size, seeds, peers)
        except Exception as e:
            log.error("[%s] Got an exception while parsing results: %s" % (provider, repr(e)))

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
Example #54
0
class ScrapeWorker(Process):
    def __init__(self, model, dummy=False):
        super(ScrapeWorker, self).__init__()

        self.source_q = Queue()
        self.parse_q = Queue()
        self.seen = ScalableBloomFilter()
        self.forwarded = ScalableBloomFilter()
        self.new_sources = []
        self.workers = []
        self.to_forward = []
        self.parser = None
        self.done_parsing = False
        self.no_more_sources = False
        self.dbs = dict()
        self.schedule = model.schedule
        self.model = model
        self.source_kill = None
        self.dummy = dummy

        db_threads = defaultdict(list)

        # Check if the functions in each template are used properly
        # and store which types of databases are needed.
        for phase in self.model.phases:
            for template in phase.templates:
                self.check_functions(template, phase)
                if template.db_type:
                    db_threads[template.db_type].append(template)

        # Start all the threads necessary for storing the data and give each
        # template a reference to the thread it needs to store data in.
        for thread, templates in db_threads.items():
            if not dummy:
                store_thread = databases._threads[thread]()
            else:
                store_thread = databases._threads['dummy']()

            for template in templates:
                self.dbs[template.name] = store_thread
            store_thread.start()

    def run(self):
        # create the threads needed to scrape
        i = 0
        while i < len(self.model.phases):
            # if self.is_scheduled():
            phase = self.model.phases[i]
            print('running phase:', i, phase.name)

            # Check if the phase has a parser, if not, reuse the one from the
            # last phase.
            self.to_parse = 0
            self.parsed = 0

            if phase.active:
                self.spawn_workforce(phase)
                self.add_sources(phase)
                self.to_forward = []
                self.parse_sources()

            if not phase.repeat:
                i += 1
        for db in self.dbs.values():
            db.store_q.put(None)
        for db in self.dbs.values():
            db.store_q.join()
        print('Waiting for the database')
        print('Scraper fully stopped')

    def parse_sources(self):
        while True:
            if self.to_parse == self.parsed:
                break
            try:
                source = self.parse_q.get(timeout=10)
            except Empty:
                if self.source_q.empty():
                    print('No more sources to parse at this point')
                    break
                # elif self.paused:
                #     time.sleep(self.get_sleep_time())
                else:
                    print('Waiting for sources to parse')
                    source = None

            if source is not None:
                self.seen.add(source.url)
                objects = self.parser.parse(source)
                self.parsed += 1

                for obj in objects:
                    if obj.db:
                        self.dbs[obj.name].store_q.put(obj)

                for new_source in self.new_sources:
                    self._gen_source(*new_source)

                self.new_sources = []
                self.show_progress()

        print('Unparsed ', self.source_q.qsize())

    def spawn_workforce(self, phase):
        # check if phase reuses the current source workforce
        if phase.parser:
            self.parser = phase.parser(parent=self, templates=phase.templates)
        elif not self.parser and not phase.parser:
            raise Exception('No parser was specified')
        else:
            parse_class = self.parser.__class__
            self.parser = parse_class(parent=self, templates=phase.templates)

        if phase.n_workers:
            n_workers = phase.n_workers
        else:
            n_workers = self.model.num_getters

        # Kill existing workers if there are any
        if self.workers:
            self.source_kill.set()

        # Create new Event to be able to kill the source workers
        self.source_kill = Event()
        self.workers = [
            phase.source_worker(parent=self, id=i, stop_event=self.source_kill)
            for i in range(n_workers)
        ]
        for worker in self.workers:
            worker.start()

    def add_sources(self, phase):
        urls_in_db = []
        if phase.synchronize:
            urls_in_db = [url for url in self.get_scraped_urls(phase)]

        for source in self.to_forward:
            if source.url not in urls_in_db:
                self.source_q.put(source)
                self.to_parse += 1

        for source in phase.sources:
            if source.from_db:
                sources = self.dbs[source.from_db].read(source.from_db)
            if source.active:
                self.source_q.put(source)
                self.to_parse += 1

    def get_scraped_urls(self, phase):
        for template in phase.templates:
            if template.name in self.dbs:
                for objct in self.dbs[template.name].read(template):
                    if objct:
                        yield objct.attrs['url'].value

    def _gen_source(self, objct, attr):
        for value in attr.value:
            # for now only "or" is supported.
            if not self._evaluate_condition(objct, attr):
                continue

            url = self._apply_src_template(attr.source, value)
            attrs = []

            if attr.source.copy_attrs:
                attrs_to_copy = attr.source.copy_attrs
                assert all(attr in objct.attrs for attr in attrs_to_copy)
                if type(attrs_to_copy) == dict:
                    # We store the copied attributes under different names.
                    for key, value in attrs_to_copy.items():
                        attrs.append(objct.attrs[key](name=value))
                else:
                    for key in attrs_to_copy:
                        attrs.append(objct.attrs[key]())

            new_source = attr.source(url=url, attrs=attrs)

            if attr.attr_condition:
                if self.value_is_new(objct, value, attr.attr_condition):
                    self._add_source(new_source)
            else:
                self._add_source(new_source)

    def _add_source(self, source):
        if source.url and (source.url not in self.seen or source.duplicate) \
                and source.url not in self.forwarded:
            if source.active:
                self.to_parse += 1
                self.source_q.put(source)
                self.seen.add(source.url)
            else:
                self.to_forward.append(source)
                self.forwarded.add(source.url)

    def value_is_new(self, objct, uri, name):
        db_objct = self.db.read(uri, objct)
        if db_objct and db_objct.attrs.get(name):
            if db_objct.attrs[name].value != objct.attrs[name].value:
                return True
            return False

    def _apply_src_template(self, source, url):
        if source.src_template:
            # use formatting notation in the src_template
            return source.src_template.format(url)
        return url

    def _evaluate_condition(self, objct, attr, **kwargs):
        # TODO add "in", and other possibilities.
        if attr.source_condition:
            for name, cond in attr.source_condition.items():
                values = objct.attrs[name].value
                # Wrap the value in a list without for example seperating the
                # characters.
                values = [values] if type(values) != list else values
                for val in values:
                    if val and not eval(str(val) + cond, {}, {}):
                        return False
        return True

    def reset_source_queue(self):
        while not self.source_q.empty():
            try:
                self.source_q.get(False)
            except Empty:
                continue
            self.source_q.task_done()

    def show_progress(self):
        if not self.dummy:
            os.system('clear')
        info = '''
        Domain              {}
        Sources to get:     {}
        Sources to parse:   {}
        Sources parsed:     {}
        Average get time:   {}s
        Average parse time: {}s
        '''
        get_average = sum(w.mean for w in self.workers) / len(self.workers)
        print(
            info.format(self.name, self.source_q.qsize(), self.to_parse,
                        self.parsed, round(get_average, 3),
                        round(self.parser.total_time / self.parsed, 3)))

    def check_functions(self, template, phase):
        error_string = "One of these functions: {} is not implemented in {}."
        not_implemented = []

        for attr in template.attrs.values():
            for func in attr.func:
                if not getattr(phase.parser, func, False):
                    not_implemented.append(func)

        if not_implemented:
            raise Exception(
                error_string.format(str(not_implemented),
                                    phase.parser.__class__.__name__))
Example #55
0
def index(request):
    '''
    res: [
        {"rentdate": yyyy-MM-dd, "venueName": xxxx, "rentTimePeriod": HH:mm:ss~HH:mm:ss ...},
        ...
    ]
    cal: [
        {
            "date": 1, # day 1
            "courts": [
                {"rentdate": d, "venueName": xxxx, "rentTimePeriod": HH:mm:ss~HH:mm:ss ...},
                ...
            ]
        },
        ...
    ]
    '''
    if 'month' in request.GET and request.GET['month']:
        requestMonth = int(request.GET['month'])
    else:
        requestMonth = datetime.now().month

    if 'year' in request.GET and request.GET['year']:
        requestYear = int(request.GET['year'])
    else:
        requestYear = datetime.now().year

    currentYear = datetime.now().year  ## for copyright year
    requestTime = datetime(requestYear, requestMonth, 1)
    requestDateS = datetime(requestTime.year, requestTime.month,
                            1).strftime("%Y-%m-%d")  ## yyyy-MM-dd
    requestDateE = datetime(
        requestTime.year, requestTime.month,
        calendar.monthrange(requestTime.year, requestTime.month)[1]).strftime(
            "%Y-%m-%d")  ## yyyy-MM-dd
    monthList = [i for i in range(1, 13)]
    monthselect = [""] * 12
    monthselect[requestMonth - 1] = "selected"
    res = []
    isDrawn = True
    q = Queue()
    threads = []

    for court in requestvenueId:
        key = {
            'rentDateS': requestDateS,
            'rentDateE': requestDateE,
            'venueId': court
        }
        t = threading.Thread(target=threadIndex, args=(q, key))
        t.start()
        threads.append(t)

    for thread in threads:
        thread.join()

    for _ in range(q.qsize()):
        data = q.get()
        res += data['res']
        isDrawn &= data['isDrawn']

    res.sort(key=lambda s: s['rentDate'])

    ## calendar
    weekdayS = calendar.monthrange(requestTime.year, requestTime.month)[0]
    days = calendar.monthrange(requestTime.year, requestTime.month)[1]
    weeks = math.ceil((weekdayS + days) / 7)

    cal = [[{"date": 0, "courts": []} for _ in range(7)] for _ in range(weeks)]
    for i in range(1, days + 1):
        cal[(i + weekdayS - 1) // 7][(i + weekdayS - 1) % 7] = {
            "date": i,
            "courts": []
        }

    ## add court to calendar
    for i in res:
        date = int(i['rentDate'][-2:])
        cal[(date + weekdayS - 1) // 7][(date + weekdayS - 1) %
                                        7]['courts'].append(i)

    return render(request, 'home/index.html', locals())
Example #56
0
class DouBanMovieSpider(object):

    def __init__(self, config, start_id):
        # a dict of cookies
        self.cookies = None
        self.thread_count = int(config['spider']['thread_count'])
        self.network_max_try_times = int(config['network']['max_try_times'])

        self.movie_id_in_queue = set()
        self.queue = Queue()
        self.queue.put(start_id)
        self.movie_id_in_queue.add(start_id)

        self.logger = Loggers.get_logger(config)
        self.db_helper = DbHelper.DbHelper()

        self.login_if_necessary(config)
        

        self.proxy = proxy.AbuyunProxy(config)
        self.store_lock = threading.Lock()
        self.db_lock = threading.Lock()

        self.thread_list = []

    def movie_exist_in_db(self, id):
        return self.db_helper.is_movie_id_exists(id)

    def start(self):
        for i_thread in range(self.thread_count):
            t = threading.Thread(target=self.scratch_movie_info, name='spider thread %d' % i_thread)
            self.thread_list.append(t)
            t.start()

    def join(self):
        for t in self.thread_list:
            t.join()

    def scratch_movie_info(self):
        while True:
            self.logger.debug("Current queue length : " + str(self.queue.qsize()))
            id_scratch = None
            try:
                id_scratch = self.queue.get(timeout=20)
            except:
                self.logger.warning('queue empty, exist thread: %s' % threading.current_thread().name)
                break

            self.logger.debug("Scratch from id : %s in thread : %s" % (id_scratch, threading.current_thread().name))
            movie = self.get_movie_by_id(id_scratch)

            if not movie:
                self.logger.debug('did not get info from this movie(id=%s)' % id_scratch)
                self.queue.put(id_scratch)

                if not self.proxy.enable:
                    Utils.Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND)
                continue

            next_movie_ids = movie.get('next_movie_ids', [])

            if len(next_movie_ids) > 0 :
                if self.store_lock.acquire():
                    for mid in next_movie_ids:
                        if mid in self.movie_id_in_queue:
                            continue
                        self.logger.debug('add %s to queue(len=%d)' % (mid,len(self.movie_id_in_queue)))
                        self.movie_id_in_queue.add(mid)
                        self.queue.put(mid)
                    self.store_lock.release()

            if self.db_lock.acquire():
                self.db_helper.insert_movie(movie)
                self.db_lock.release()

            # if proxy is enable , we won't wait for seconds because the proxy will change IP very frequently
            if not self.proxy.enable:
                Utils.Utils.delay(constants.DELAY_MIN_SECOND, constants.DELAY_MAX_SECOND)

    def get_movie_by_id(self, id):
        # change headers every time, it seems unnecessary when we use proxy
        headers = {'User-Agent': random.choice(constants.USER_AGENT)}

        # 获取豆瓣页面(API)数据
        r = None
        try_times = 0
        while not r:
            try:
                try_times += 1
                r = requests.get(
                    constants.URL_PREFIX + str(id),
                    headers=headers,
                    cookies=self.cookies,
                    proxies=self.proxy.get()
                )

                if not r:
                    if try_times <= self.network_max_try_times:
                        # wait seconds if we can't get any response
                        Utils.Utils.delay(1, 5)
                    else:
                        self.logger.error('Cannot get movie(id=%s) info' % id)
                        return None
            except IOError as e:
                self.logger.warning('request exception : %s' % str(e))
                if try_times <= self.network_max_try_times:
                    # wait for seconds if any network error represent, there must be some troubles with the network
                    Utils.Utils.delay(1, 5)
                else:
                    self.logger.error('Cannot get movie(id=%s) info' % id)
                    return None

        r.encoding = 'utf-8'

        # 提取电影数据
        movie_parser = MovieParser.MovieParser()
        movie_parser.set_html_doc(r.text)
        movie = movie_parser.extract_movie_info()
        if movie:
            movie['douban_id'] = id
        return movie

    def login_if_necessary(self, config):
        login_enable = int(config['login_douban']['enable']) == 1
        if not login_enable:
            self.logger.debug('douban login enable : %s' % str(login_enable))
            return

        cookie_helper = CookiesHelper.CookiesHelper(
            config['douban']['user'],
            config['douban']['password']
        )
        # 模拟用户登录
        self.cookies = cookie_helper.get_cookies()
        self.logger.debug('cookies: %s' % str(self.cookies))
Example #57
0
def run():
	with pg.connect('dbname=ng_am user=ng_am password=ng_am') as conn, open('out', 'w') as outf:
		cur = conn.cursor()
		hitQ = Queue()
		failQ = Queue()
		try:
			with open('qs.json', 'r') as qf:
				[hitL, failL] = json.load(qf)
				for hit in hitL:
					hitQ.put((V(*[tuple(h) for h in hit[0] if h[0] < 30]), V(*hit[1])))
				for fail in failL:
					failQ.put(fail)
		except FileNotFoundError:
			pass
			
		if hitQ.qsize() == 0:
			hitQ.put((V(), V())) # for each prefix I need to get the total score of all the remaining possibiliies and the scores of each of those as well, then break those into grouped letter placements. These will likely be very big so I would want to put these into the database if I can
		# it's important to track the number of failures to get to that point too, so the state seems to be:
		# set of failed guesses
		# pos-letter combinations
		#
		# from a given guess, we split into all of the new positions and task them to count for each sub. But the distribution is just meant for making decisions on the next letter to choose: the actual probability of being wrong is given from the score distribution of the terms that don't have the next chosen letter. Note we have to recalculate the distributions updating the arrays separately, the hit array and the fail array: these are the two dimensions. And each node yields two outcomes, one for success and one for failure to the next most likely letter
		# That makes sense. To queue for BFS I may have to just be prudent in how I push into the queue, just to make sure I go along the failures before I go along the axis of successes. Also note that if I do want to just make conditional probabilities in the DB then I'll have to include the failure probabilities relative to the parent. And I guess the number of failures as well. Or I can just index them properly. Each stores the index relative to the parent letters, as well as the configuration? This is also unique, which makes it hard to store into a DB other than as a JSON object since it's a list of tuples. The BFS will be dominated by the last layer, I may just hold this in memory until I find a good way to represent it on disk, maybe even just as a flat JSON.
		# I need to know the probability of the match failing which happens if we choose something that doesn't contain the most likely letter, which is distinct from the score of the words with the most likely letter: I need to get the sum of the scores of all the words so I can do the universal difference/ratio
		while not (hitQ.empty() and failQ.empty()):
			try:
				q = failQ.get(timeout=0)
			except Empty:
				q = hitQ.get()
			
			hits, fails = q
			print(q)
			flat_hits = [s for hit in hits for s in hit]
			sys.stdout.write('Scoring\r')
			cur.execute('''
				SELECT COUNT(*), SUM(score) FROM words w
				LEFT JOIN letters l ON l.letter = ANY(%s) AND l.word = w.id
				WHERE l.word IS NULL
				''' + "".join([' AND w.l%s=%s'] * len(hits)),
				tuple([list(fails)] + flat_hits)
			)
			scorer = cur.fetchone()
			num, tot = scorer
			if num > 0:
				sys.stdout.write('Searching\r')
				cur.execute('''
					SELECT st1.letter, COUNT(*), SUM(st1.score) AS s FROM (
					  SELECT l3.letter, w.score, w.id 
					    FROM words w
					    LEFT JOIN letters l2 ON w.id = l2.word AND l2.letter = ANY(%s)
					    INNER JOIN letters l3 ON l3.word = w.id
							WHERE l2.word IS NULL AND l3.letter <> ALL(%s)
					'''\
					+ "".join([' AND w.l%s=%s'] * len(hits))\
					+ ''') st1
					GROUP BY st1.letter
					ORDER BY s DESC LIMIT 1''', # WHERE NOT (st1.letter ~ '[^A-Za-z]')
					tuple([list(fails), list(set(hit[1] for hit in hits))] + flat_hits)
				) # AND w.length = %s
				nextr = cur.fetchone()
				if nextr != None:
					(next_letter, next_count, score) = nextr
					sys.stdout.write('Assembling\r')
					cur.execute('''
						SELECT COUNT(*), l1.pos FROM words w
						LEFT JOIN letter_agg l ON l.letter = ANY(%s) AND l.word = w.id
						INNER JOIN letter_agg l1 ON l1.word = w.id
						WHERE l.letter IS NULL AND l1.letter = %s
						'''\
						+ "".join([' AND w.l%s=%s'] * len(hits))
						+ ' GROUP BY l1.pos',
						tuple([list(fails)] + [next_letter] + flat_hits)
					)
					nexts = cur.fetchall()
					json.dump([flat_hits, list(fails), scorer, nextr, nexts], outf, cls=DecimalEncoder)
					outf.write('\n')
					outf.flush()
					
					for n in nexts:
						hitQ.put((hits + [(n_, next_letter) for n_ in n[1] if n_ < 30], fails))
					failQ.put((hits, fails.append(next_letter)))
Example #58
0
class PooledConnection(object):
    """连接池"""

    def __init__(self, connection_strings, max_count=10, min_free_count=1, monitor_log=False):
        self._max_count = max_count
        self._min_free_count = min_free_count
        self._connection_strings = connection_strings
        self._count = 0
        self._queue = Queue(max_count)
        self._lock = threading.Lock()

        if monitor_log:
            self._run_monitor()

    def __del__(self):
        while not self._queue.empty():

            conn = self._queue.get()

            if conn:
                self._close_connection(conn)
            else:
                break

    def _run_monitor(self):
        def process(p):
            log.info('pool connection state:pid:%s, max_count:%s,min_free_count:%s,count:%s,free_count:%s' %
                     (os.getpid(), p._max_count, p._min_free_count, p._count, p._queue.qsize()))

        t = threading.Timer(5.0, process, args=(self,))
        t.start()
        t.join()

    def _create_connection(self, autoCommit=True):
        if self._count >= self._max_count:
            raise PoolError('The maximum number of connections beyond!')
        conn = Connection(self, host=self._connection_strings.get('host'),
                          port=self._connection_strings.get('port'),
                          user=self._connection_strings.get('user'),
                          password=self._connection_strings.get('password'),
                          db=self._connection_strings.get('database'),
                          charset='utf8',
                          autocommit=autoCommit,
                          cursorclass=pymysql.cursors.DictCursor)
        self._count += 1
        return conn

    def release_connection(self, connection):
        """释放连接"""
        self._lock.acquire()
        if self._queue.qsize() >= self._min_free_count:
            self._close_connection(connection)
        else:
            self._queue.put(connection)
        self._lock.release()

    def get_connection(self, timeout=15):
        """获取一个连接"""
        bt = datetime.datetime.now()

        def get_conn():
            self._lock.acquire()
            try:
                if not self._queue.empty():
                    conn = self._queue.get()
                elif self._count < self._max_count:
                    conn = self._create_connection()
                else:
                    conn = None
                return conn
            except:
                raise
            finally:
                self._lock.release()

        conn = get_conn()
        if conn:
            return conn
        else:
            if timeout:
                while (datetime.datetime.now() - bt).seconds < timeout:
                    conn = get_conn()
                    if conn:
                        break
                    time.sleep(0.2)
            if not conn:
                raise PoolError('Timeout!There has no enough connection to be used!')
            return conn

    def _close_connection(self, connection):
        """关闭连接"""
        try:
            if connection._close():
                self._count -= 1
        except:
            pass
Example #59
0
def execute_jobs(jobs,
                 show_progress=False,
                 number_of_workers=10,
                 debug_jobs=False):
    global windows_force_posix
    from vcstool.streams import stdout
    if debug_jobs:
        logger.setLevel(logging.DEBUG)

    if windows_force_posix:
        logger.debug('force POSIX paths on Windows')

    results = []

    job_queue = Queue()
    result_queue = Queue()

    # create worker threads
    workers = []
    for _ in range(min(number_of_workers, len(jobs))):
        worker = Worker(job_queue, result_queue)
        workers.append(worker)

    # fill job_queue with jobs for each worker
    pending_jobs = list(jobs)
    running_job_paths = []
    while job_queue.qsize() < len(workers):
        job = get_ready_job(pending_jobs)
        if not job:
            break
        running_job_paths.append(job['client'].path)
        logger.debug("started '%s'" % job['client'].path)
        job_queue.put(job)
    logger.debug('ongoing %s' % running_job_paths)

    # start all workers
    [w.start() for w in workers]

    # collect results
    while len(results) < len(jobs):
        (job, result) = result_queue.get()
        logger.debug("finished '%s'" % job['client'].path)
        running_job_paths.remove(result['job']['client'].path)
        if show_progress and len(jobs) > 1:
            if result['returncode'] == NotImplemented:
                stdout.write('s')
            elif result['returncode']:
                stdout.write('E')
            else:
                stdout.write('.')
            if debug_jobs:
                stdout.write('\n')
            stdout.flush()
        result.update(job)
        results.append(result)
        if pending_jobs:
            for pending_job in pending_jobs:
                pending_job.get('depends', set()).discard(job['client'].path)
            while job_queue.qsize() < len(workers):
                job = get_ready_job(pending_jobs)
                if not job:
                    break
                running_job_paths.append(job['client'].path)
                logger.debug("started '%s'" % job['client'].path)
                job_queue.put(job)
            assert running_job_paths
        if running_job_paths:
            logger.debug('ongoing ' + str(running_job_paths))
    if show_progress and len(jobs) > 1 and not debug_jobs:
        print('', file=stdout)  # finish progress line

    # join all workers
    for w in workers:
        w.done = True
    [w.join() for w in workers]
    return results
Example #60
0
class Batcher:
    def __init__(self, vocab, bin_path, hps):
        assert os.path.exists(bin_path)
        self.vocab = vocab
        self.bin_path = bin_path
        # bin_fname = args.split_data_path.format(setname).replace('.json', '.bin')
        self.hps = hps
        self.single_pass = True if 'eval' in hps.mode else False

        QUEUE_MAX_SIZE = 50
        self.batch_cache_size = 50
        self.batch_queue = Queue(QUEUE_MAX_SIZE)
        self.example_queue = Queue(QUEUE_MAX_SIZE * 16)#self.hps.batch_size)

        self.example_thread = Thread(target=self.fill_example_queue)
        self.example_thread.daemon = True
        self.example_thread.start()

        self.batch_thread = Thread(target=self.fill_batch_queue)
        self.batch_thread.daemon = True
        self.batch_thread.start()

    def next_batch(self):
        if self.batch_queue.qsize() == 0:
            if self.single_pass:
                print("[*]FINISH decoding")
                return 'FINISH'
            else:
                print("Batch queue is empty. waiting....")
                raise ValueError("Unexpected finish of batching.")
        batch = self.batch_queue.get()
        return batch

    def fill_example_queue(self):
        gen = sample_generator(self.bin_path, self.single_pass)
        while True:
            try:
                if 'nli' in self.bin_path:
                    premise, hypo, label = next(gen)
                    example = Example(premise, hypo, label, None, None, self.vocab, self.hps)
                elif 'main' in self.bin_path:
                    w0, w1, claim, reason, label = next(gen)
                    example = Example(w0, w1, label, claim, reason, self.vocab, self.hps)
                else:
                    raise ValueError

            except Exception as err:
                print("Error while fill example queue: {}".format(self.example_queue.qsize()))
                assert self.single_pass
                break
            self.example_queue.put(example)

    def fill_batch_queue(self):
        while True:
            if not self.single_pass:
                assert 'eval' not in self.hps.mode
                inputs = []
                for _ in range(self.hps.batch_size * self.batch_cache_size):
                    inputs.append(self.example_queue.get())

                batches = []
                for idx in range(0, len(inputs), self.hps.batch_size):
                    batches.append(inputs[idx:idx + self.hps.batch_size])
                if not self.single_pass:
                    shuffle(batches)
                for bat in batches:
                    self.batch_queue.put(Batch(bat, self.hps, self.vocab))
            else:
                assert 'eval' in self.hps.mode
                sample = self.example_queue.get()
                bat = [sample for _ in range(self.hps.batch_size)]
                self.batch_queue.put(Batch(bat, self.hps, self.vocab))