Example #1
0
class FlashForgeIO(Thread):
	_instance = None
	
	def __init__(self, reconnect_timeout=5, vendorid=0x2b71, deviceid=0x0001):
		Thread.__init__(self)
		self.queue = Queue()
	
	def run(self):
		app.logger.info('[FlashforgeIO] started')
		ff = FlashForge()
		while True:
			app.logger.info('[FlashForgeIO] Waiting for next GCode command')
			command = self.queue.get()
			if not command.endswith('\n'):
				command += '\n'
			socketio.emit('terminal', '> ' + command)
			app.logger.info('[FlashForgeIO]  Executing command: {0}'.format(command))
			
			try:
				data = ff.gcodecmd(command)
				if not data.endswith('\n'):
					data += '\n'
				socketio.emit('terminal', '< ' + data)
				self.queue.task_done()
			except FlashForgeError as error:
				socketio.emit('terminal', 'COMERROR: {0}'.format(error.message))
Example #2
0
class CarCommandHandler(WebSocketHandler):
    need_background = True

    def connect_handler(self, request):
        self.car_id = int(request.match_info['car_id'])
        self.car = core.models.Car.objects.get(id=self.car_id)
        print('websocket connection started for car ID', self.car_id)

        self.command_queue = Queue()

    async def process_msg(self, msg_text: str):
        # print('Got', msg_text, now())
        self.command_queue.put(msg_text)

    async def background(self, ws: web.WebSocketResponse):
        commander = car_connector.Commander(self.car)

        while not ws.closed:
            try:
                command = self.command_queue.get_nowait()
                commander.send_command(command)
                ws.send_str(command)
                # print('Response:', response, now())
                self.command_queue.task_done()
            except Empty:
                await asyncio.sleep(0.01)
        print('Close background for car ID', self.car_id)
def multiget(client, keys, **options):
    """
    Executes a parallel-fetch across multiple threads. Returns a list
    containing :class:`~riak.riak_object.RiakObject` instances, or
    3-tuples of bucket, key, and the exception raised.

    :param client: the client to use
    :type client: :class:`~riak.client.RiakClient`
    :param keys: the bucket/key pairs to fetch in parallel
    :type keys: list of two-tuples -- bucket/key pairs
    :rtype: list
    """
    outq = Queue()

    RIAK_MULTIGET_POOL.start()
    for bucket, key in keys:
        task = Task(client, outq, bucket, key, options)
        RIAK_MULTIGET_POOL.enq(task)

    results = []
    for _ in range(len(keys)):
        if RIAK_MULTIGET_POOL.stopped():
            raise RuntimeError("Multi-get operation interrupted by pool "
                               "stopping!")
        results.append(outq.get())
        outq.task_done()

    return results
Example #4
0
    def lookup_statuses(self, min_status, codename, hardware):
        """Scrape statuses/info in from launchpad.net mirror pages"""
        while (self.got["data"] < self.status_num) and self.ranked:
            data_queue = Queue()
            num_threads = self.__queue_lookups(codename, hardware, data_queue)
            if num_threads == 0:
                break
            # Get output of all started thread methods from queue
            progress_msg(self.got["data"], self.status_num)
            for _ in range(num_threads):
                try:
                    # We don't care about timeouts longer than 7 seconds as
                    # we're only getting 16 KB
                    info = data_queue.get(block=True, timeout=7)
                except Empty:
                    pass
                else:
                    data_queue.task_done()
                    if info[1] and info[1]["Status"] in self.status_opts:
                        self.urls[info[0]].update(info[1])
                        self.got["data"] += 1
                        self.top_list.append(info[0])
                        progress_msg(self.got["data"], self.status_num)
                    else:
                        # Remove unqualified results from ranked list so
                        # queueing can use it to populate the right threads
                        self.ranked.remove(info[0])

                if (self.got["data"] == self.status_num):
                    break

            data_queue.join()
Example #5
0
    def _port_ping(self, hosts: Queue, interface: str, results: set):
        self.logger.debug("{}: Starting TCP SYN ping thread.".format(threading.current_thread().name))

        while True:
            ip = hosts.get()  # type: IPAddress
            ip_str = str(ip)

            # Send SYN with random Src Port for each Dst port
            for dstPort in self.portstoscan:
                srcPort = random.randint(1025, 65534)
                resp = sr1(IP(dst=ip_str) / TCP(sport=srcPort, dport=dstPort, flags=ScapyTCPFlag.SYN), timeout=1,
                           verbose=False,
                           iface=interface)
                if resp and resp.haslayer(TCP):
                    if resp[TCP].flags == (TCPFlag.SYN | TCPFlag.ACK) or resp[TCP].flags == (TCPFlag.RST | TCPFlag.ACK):
                        # Send Reset packet (RST)
                        send(IP(dst=ip_str) / TCP(sport=srcPort, dport=dstPort, flags=ScapyTCPFlag.RST),
                             iface=interface, verbose=False)

                        # We know the port is closed or opened (we got a response), so we deduce that the host exists
                        node = NetworkNode()
                        node.ip = ip
                        node.mac = EUI(resp.src)
                        node.host = resolve_ip(resp[IP].src)
                        results.add(node)

                        self.logger.debug(
                            "Found a live host by pinging port {port_nbr}: {live_host}.".format(port_nbr=dstPort,
                                                                                                live_host=str(node)))

                        # We don't need to test the other ports. We know the host exists.
                        break

            hosts.task_done()
class Command(BaseCommand):
    help = "Update the number of words to translate in every task"

    queue = Queue()
    num_threads = 80
    threads = []

    def handle(self, *args, **options):
        self.queue = Queue()
        self.stdout.write('Reading tasks...')
        for it in TransTask.objects.all():
            self.queue.put({'id': it.id, 'num': get_num_words(it.object_field_value)})

        for i in range(self.num_threads):
            t = Thread(target=self.worker_elements)
            t.start()
            self.threads.append(t)
        self.stdout.write("Waiting for empty queue")
        self.queue.join()
        self.stop_threads()

    def stop_threads(self):
        for t in self.threads:
            t.join()
        self.stdout.write('Exiting main thread')

    def worker_elements(self):
        while not self.queue.empty():
            try:
                item = self.queue.get(timeout=2)
                TransTask.objects.filter(pk=item['id']).update(number_of_words=item['num'])
            except Empty:
                break
            finally:
                self.queue.task_done()
Example #7
0
class MessageDispatcher(threading.Thread):
    """Simple queue based message dispatcher."""

    def __init__(self):
        threading.Thread.__init__(self)

        self._message_queue = Queue()
        self._stop_event = threading.Event()

    def start(self):
        """ Starts the dispatcher.  """
        if threading.currentThread() == self:
            raise RuntimeError("Cannot call start on the thread itself.")
        threading.Thread.start(self)

    def stop(self):
        """Stops processing the queue.

        Doesn't ensure that queue is empty before stop. Message are thrown away.
        Look at _message_queue.join() in future.
        """
        if threading.currentThread() == self:
            raise RuntimeError("Cannot call start on the thread itself.")
        self._stop_event.set()

    def queue_message(self, message):
        """Queue a Message to be dispatched.

        Args:
            message: message to be dispatched. Type: Message.
        """
        if type(message) is not Message:
            raise TypeError("Only objects of type Message can be queued.")
        if message.signal.receivers:
            logger.info("MessageDispatcher: added message: " +
                        message.kwargs.__str__())
            logger.debug("MessageDispatcher: receivers: " +
                         message.signal.receivers.__str__())
            self._message_queue.put(message)
        else:
            logger.info("MessageDispatcher: skipped message: " +
                        message.kwargs.__str__())

    def run(self):
        """Worker for the message dispatcher thread."""
        while self._stop_event.is_set() is False:
            while self._message_queue.empty() is False:
                message = self._message_queue.get()

                # It is possible that we a signal is dispatched to a receiver
                # not present during enqueue of the message, how ever is present
                # now. YAGNI call for the moment.
                logger.debug("MessageDispatcher: dispatch message: " +
                             message.kwargs.__str__())
                logger.debug("MessageDispatcher: receivers: " +
                             message.signal.receivers.__str__())
                message.send()
                logger.debug("MessageDispatcher: message dispatched!")
                self._message_queue.task_done()
            self._stop_event.wait(0.01)
Example #8
0
class SampleThread:
    def __init__(self):
        self.q = Queue()
        pass

    def exampleJob(self, worker):
        time.sleep(0.5)

        with print_lock:
            print("name: {}, worker: {}".format(threading.current_thread().name, worker))

    def threader(self):
        while True:
            worker = self.q.get()
            self.exampleJob(worker)
            self.q.task_done()

    def execute(self):
        for x in range(10):
            t = threading.Thread(target = self.threader)
            t.daemon = True
            t.start()

        self.start = time.time()

        for worker in range(20):
            self.q.put(worker)

        self.q.join()
Example #9
0
class SockSendThread(threading.Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None):
        super(SockSendThread, self).__init__(group=group, target=target, name=name)

        (self.conn, self.c) = args
        self.q = Queue()

    def run(self):
        while not self.c.quitf:
            try:
                message = self.q.get(block=False)
            except queue.Empty:
                time.sleep(0.1)
                continue
            try:
                #print("Sent data on socket: " + str(message))
                self.conn.send(message)
                self.q.task_done()
            except Exception as e:
                if '35' in str(e):
                    pass
                else:
                    print(e)
                    break
        # came out of loop
        self.conn.close()
Example #10
0
class MangaUpdateWorker(QtCore.QThread):
    data_updated = QtCore.pyqtSignal(object)

    def __init__(self, parent):
        super(MangaUpdateWorker, self).__init__()
        self._parent = parent
        self.q = Queue()
        self.thread().daemon = True
        self._abort = False

    def abort(self):
        self._abort = True

    def push(self, title):
        self.q.put(title)

    def run(self):
        while not self.q.empty() and not self._abort:
            title = self.q.get()
            self.update_manga(title)
            self.q.task_done()
        self.data_updated.emit('\n{} Update Completed {}'.format('-' * 21, '-' * 21))

    def update_manga(self, title):
        factor = 60 - (len(title) + 2)
        self.data_updated.emit('{}: {}\n'.format(title, '-' * factor))
        chapters = Library.update_manga_by_title(title)
        if len(chapters) > 0:
            for chapter in chapters:
                self.data_updated.emit('     {}\n'.format(chapter.title))
Example #11
0
class Sound(hass.Hass):

  def initialize(self):
    
    # Create Queue
    self.queue = Queue(maxsize=0)

    # Create worker thread
    t = Thread(target=self.worker)
    t.daemon = True
    t.start()
    
    self.event = Event()
    
  def worker(self):
    active = True
    while active:
      try:
        # Get data from queue
        data = self.queue.get()
        if data["type"] == "terminate":
          active = False
        else:
          # Save current volume
          volume = self.get_state(self.args["player"], attribute="volume_level")
          # Set to the desired volume
          self.call_service("media_player/volume_set", entity_id = self.args["player"], volume_level = data["volume"])
          if data["type"] == "tts":
            # Call TTS service
            self.call_service("tts/amazon_polly_say", entity_id = self.args["player"], message = data["text"])
          if data["type"] == "play":
            netpath = netpath = 'http://{}:{}/local/{}/{}'.format(self.args["ip"], self.args["port"], self.args["base"], data["path"])
            self.call_service("media_player/play_media", entity_id = self.args["player"], media_content_id = netpath, media_content_type = data["content"])

          # Sleep to allow message to complete before restoring volume
          time.sleep(int(data["length"]))
          # Restore volume
          self.call_service("media_player/volume_set", entity_id = self.args["player"], volume_level = volume)
          # Set state locally as well to avoid race condition
          self.set_state(self.args["player"], attributes = {"volume_level": volume})
      except:
        self.log("Error")
        self.log(sys.exc_info())

      # Rinse and repeat
      self.queue.task_done()
      
    self.log("Worker thread exiting")
    self.event.set()
       
  def tts(self, text, volume, length):
    self.queue.put({"type": "tts", "text": text, "volume": volume, "length": length})
    
  def play(self, path, content, volume, length):
    self.queue.put({"type": "play", "path": path, "content": content, "volume": volume, "length": length})

  def terminate(self):
    self.event.clear()
    self.queue.put({"type": "terminate"})
    self.event.wait()
Example #12
0
class Banker:
    number_of_bankers = 0
    active_bankers = []

    def __init__(self):
        self.q = Queue()
        self.currently_serving = None
        self.active = True
        self.number = Banker.number_of_bankers = Banker.number_of_bankers + 1
        Banker.active_bankers.append(self)

    def serve(self, person):
        # serve client - here we only pretend to do work by calling sleep
        time.sleep(person.time_for_service)
        Observer.notify()

    def do_banky_stuff(self):
        # take the person at top of own queue and serve it
        while self.active:
            item = self.q.get()
            self.currently_serving = item
            self.serve(item)
            self.currently_serving = None
            self.q.task_done()

    def stop(self):
        print("Banker no. " + str(self.number) + ": I quit this damn job!")
        Banker.active_bankers.remove(self)
        self.active = False
        Regulator.spread_queue(self)
Example #13
0
class HTTPCauldronRequestHandler(BaseHTTPRequestHandler):

	server_version = "Extremon/0.1"

	def do_GET(self):
		self.outq=Queue(maxsize=10)
		self.running=True
		self.server.add_consumer(self)

		self.send_response(200)
		self.send_header("Content-type", "text/plain")
		self.end_headers()
		self.running=True

		try:
			while self.running:
				try:
					self.wfile.write(self.outq.get())
					self.outq.task_done()
				except error:
					self.running=False
		finally:
			self.server.remove_consumer(self)	

	def write(self,data):
		try:
			self.outq.put(data,block=False)
		except Full:
			pass
Example #14
0
class BaseCrawler:

    def __init__(self, settings):
        self.host = settings.HOST
        self.daemons = settings.DAEMONS_COUNT
        self.queue = Queue()
        self.last_requests = deque([time.time()], maxlen=settings.RPS)

        self._spawn_daemons()

    def _spawn_daemons(self):
        for _ in range(self.daemons):
            t = threading.Thread(target=self._daemon_worker)
            t.daemon = True
            t.start()

    def _daemon_worker(self):
        conn = client.HTTPSConnection(self.host)
        while True:
            current_time = time.time()
            if (current_time - self.last_requests[0]) < 1.:
                continue
            self.last_requests.append(current_time)
            item = self.queue.get()
            self.callback(item, conn)
            self.queue.task_done()

    def callback(self, item, conn):
        raise NotImplementedError

    def process(self):
        raise NotImplementedError
Example #15
0
class StdInCauldronRequestHandler(Thread):

	def __init__(self,server,path):
		Thread.__init__(self,name='StdInCauldronRequestHandler %s' % (path))
		self.daemon=True
		self.server=server
		self.path=path
	
	def run(self):
		self.outq=Queue(maxsize=10)
		self.server.add_consumer(self)

		try:
			self.process=Popen(self.path,stdin=PIPE,stdout=PIPE,stderr=PIPE)
			self.running=True
			print("StdInCauldronRequestHandler %s running" % (self.path))
			while self.running:
				try:
					self.process.stdin.write(self.outq.get())
					self.outq.task_done()
				except error:
					self.running=False
		finally:
			self.server.remove_consumer(self)	

	def write(self,data):
		try:
			self.outq.put(data,block=False)
		except Full:
			pass

	def stop(self):
		self.running=False
		self.process.terminate()
		self.process.wait()
Example #16
0
class Comparisons:
    def __init__(self, query):
        self.workers = Queue()
        self.query = query

    def __call__(self):
        return self.run()

    def run(self):
        slaves_thread = Thread(target=self.slaves)
        slaves_thread.start()
        return self.master()

    def master(self):
        count = 0
        while True: # not workers.empty()
            worker = self.workers.get()
            item = worker.join()
            self.workers.task_done()
            if item is not None:
                json_item = json.dumps(worker.join())
                yield json_item
            elif item is False:
                print('All done!!')
                break

    def slaves(self):
        cars = GetCars(self.query)
        for car in cars():
            car_appraisal = AppraiseCar(car)
            worker = ThreadWithReturnValue(target=car_appraisal.run)
            worker.start()
            self.workers.put(worker)
Example #17
0
class Work:
    def __init__(self, threads = 100, offset = 0, timeout = 15.0):
        self.lines = Lines(f=_DOMAINS_FILE)
        self.num_lines = len(self.lines.d)
        self.concurrent = threads
        self.q = Queue(self.concurrent * 2)
        for self.i in range(self.concurrent):
            self.t = Thread(target=self.doWork)
            self.t.daemon = True
            self.t.start()
        try:
            for self.i in range(offset, self.num_lines):
                self.q.put(self.i)
            self.q.join()
        except KeyboardInterrupt:
            sys.exit(1)

    def doWork(self):
        while True:
            n = int(self.q.get())
            _domain = self.lines.find_line(n=n)

            c = Check(url = _domain)
            ip = c.ip
            if ip:
                print(ip, end = ' ')
                print(_domain)
                save(ip=ip, domain=_domain)

            self.q.task_done()
Example #18
0
class TaskManager:

    # Initialise the object
    def __init__(self):
        self.q = Queue(maxsize=0)
        thread = threading.Thread(target=self.loop)
        thread.start()

    # Continuously loop through tasks that need to be completed
    def loop(self):
        # General Application loop
        while True:
            mytime = str(time.time())
            thistask = self.q.get()
            # Lets check what type of task we have
            for case in Switch(thistask['type']):
                # Create a new node
                if case('parse_submission'):
                    cherrypy.config['model'].parse_submission(thistask['data'])
                    break
                # A test so we can easily track things
                if case('test'):
                    break
            logging.debug("COMPLETED "+thistask['type']+" UID: "+thistask['uid']+" START:"+thistask['start']+" FIN: "+mytime)
            self.q.task_done()
    
    # Add a new task to the queue in the form of: {'type':'create_node','data':data}
    def add(self, task):
        task['uid'] = str( uuid.uuid1() )
        logging.debug('ADDED NEW TASK TO THE QUEUE: '+task['uid'])
        task['start'] = str(time.time())
        self.q.put(task)
        return task['uid']
Example #19
0
class Cloner:
    '''A worker with its own queue for cloning repositories located at the
    submitted URLs.

    '''
    def __init__(self, handler):
        '''Build a new cloner. The handler argument should be a function that
        accepts two arguments, repo and code.

        '''
        self.work_queue = Queue()
        self.handler = handler
        self.worker = Thread(target=self._work)
        self.worker.setDaemon(True)
        self.worker.start()
    
    def add_work(self, repo):
        '''Add a repository to be cloned.'''
        self.work_queue.put(repo)
        
    def _work(self):
        '''Loop forever, pulling work off the work_queue to clone github
        repositories and calling the handler upon each finished
        clone.

        '''
        while True:
            if not self.work_queue.empty():
                next_url = self.work_queue.get()
                code = mkdirs_clone(next_url).wait()
                self.work_queue.task_done()
                self.handler(next_url, code)
Example #20
0
class Mission:
    def __init__(self, max_thread):
        self.queue = Queue()
        self.max_thread = max_thread

    def __enter__(self):
        for x in range(self.max_thread):
            thread = Thread(target=self._threader)
            thread.daemon = True
            thread.start()
        return self

    def __exit__(self, exception_type, exception_value, traceback):
        self.queue.join()

    def send_task(self, func, *args):
        self.queue.put((func, args))

    def _threader(self):
        while True:
            try:
                func, args = self.queue.get()
                func(*args)
                self.queue.task_done()
            except queue.Empty:
                pass
Example #21
0
class JQueryChaliceRequestHandler(BaseHTTPRequestHandler):

	server_version = "Extremon/0.1"

	def do_GET(self):
		self.outq=Queue(maxsize=10)
		self.running=True
		self.server.add_consumer(self)

		self.send_response(200)
		self.send_header("Content-type", "text/plain")
		self.send_header("Access-Control-Allow-Origin", "*")
		self.end_headers()
		self.missed=0
		self.running=True

		try:
			while self.running:
				try:
					message = self.outq.get() + bytes('%s.timestamp=%.2f\n%s.missed=%d\n\n' % (self.server.prefix,time.time(),self.server.prefix,self.missed),'UTF-8')
					self.wfile.write(bytes(str(len(message)) + ";", 'UTF-8'))
					self.wfile.write(message)
					self.wfile.write(b';')
					self.outq.task_done()
				except error:
					self.running=False
		finally:
			self.server.remove_consumer(self)

	def write(self,data):
		try:
			self.outq.put(data,block=False)
		except Full:
			self.missed+=1
def craigsgenerator(sites = None, sections = None, listings = _listings,
                    cachedir = 'craigslist', scheme = 'https',
                    get = requests.get,
                    threads_per_section = 10, superthreaded = superthreaded):
    '''
    These parameters limit what pages will be downloaded; if you use the defaults, all pages will be downloaded.
        sites: An iterable of Craigslist sites to download (like "boston.craigslist.org")
        sections: An iterable of Craigslist sites to download (like "roo" or "sub")

    The rest of the paramaters relate to the manner of download.
        cachedir (str): Where should downloads (pickled Response objects) be stored?
        scheme (str): "https" or "http"
        get: a function that takes a url and returns a Response object
        threads_per_section (int): How many threads to run within each particular craigslist section, by site
        superthreaded (bool): Whether to run each craigslist site in a different thread

    Output:
        A generator of dictionaries
    '''
    sleep_interval = 1
    kwargs = {
        'cachedir': cachedir, 'scheme': scheme,
        'get': get,
    }

    if sites == None:
        kwargs_sites = dict(kwargs)
        del(kwargs_sites['scheme'])
        sites = _sites(**kwargs)
    if sections == None:
        sections = _sections(**kwargs)

    warehouse = Warehouse(os.path.join(cachedir, 'listings'))
    def get_listings(site, section):
        return listings(scheme, get, threads_per_section, warehouse, site, section,
                        parse.listing, parse.search, parse.next_search_url,
                        download, datetime.datetime.today)

    if not superthreaded:
        for site in sites:
            for section in sections:
                for listing in get_listings(site, section):
                    yield listing

    else:
        results = Queue()
        def sink_listings(site,section):
            for listing in get_listings(site,section):
                results.put(listing)
            logger.info('Finished %s/%s' % (site,section))

        with f.ThreadPoolExecutor(threads_per_section) as e:
            futures = {}
            for site in sites:
                for section in sections:
                    futures[(site,section)] = e.submit(sink_listings, site, section)
            while not (all(future.done() for future in futures.values()) and results.empty()):
                yield results.get()
                results.task_done()
class Controller():
    def __init__(self):
        self.sending_flag = True
        self.conf = {}
        self.network = None
        self.queue = Queue()
        self.world = World(self.queue)
        self.client = AI()
        self.argNames = ["AICHostIP", "AICHostPort", "AICToken", "AICRetryDelay"]
        self.argDefaults = ["127.0.0.1", 7099, "00000000000000000000000000000000", "1000"]

    def start(self):
        self.read_settings()
        self.network = Network(ip=self.conf[self.argNames[0]],
                               port=self.conf[self.argNames[1]],
                               token=self.conf[self.argNames[2]],
                               message_handler=self.handle_message)
        self.network.connect()

        def run():
            while self.sending_flag:
                event = self.queue.get()
                self.queue.task_done()
                message = {
                    'name': Event.EVENT,
                    'args': [{'type': event.type, 'args': event.args}]
                }
                self.network.send(message)

        Thread(target=run, daemon=True).start()

    def terminate(self):
        print("finished!")
        self.network.close()
        self.sending_flag = False

    def read_settings(self):
        if os.environ.get(self.argNames[0]) is None:
            for i in range(len(self.argNames)):
                self.conf[self.argNames[i]] = self.argDefaults[i]
        else:
            for i in range(len(self.argNames)):
                self.conf[self.argNames[i]] = os.environ.get(self.argNames[i])

    def handle_message(self, message):
        if message[Constants.KEY_NAME] == Constants.MESSAGE_TYPE_INIT:
            self.world.handle_init_message(message)
        elif message[Constants.KEY_NAME] == Constants.MESSAGE_TYPE_TURN:
            self.world.handle_turn_message(message)
            self.do_turn()
        elif message[Constants.KEY_NAME] == Constants.MESSAGE_TYPE_SHUTDOWN:
            self.terminate()

    def do_turn(self):

        def run():
            self.client.do_turn(self.world)

        Thread(target=run, daemon=True).start()
Example #24
0
class Hasher(object):
    '''
    Returns the Hash of a file,
    uses Cache when possible.
    '''

    def __init__(self):
        '''
        There must only be one Hasher at Time.
        '''
        self.toHash = Queue()
        for i in range(config().workerThreads):
            self.hashWorker = Thread(target=self.createHashWorker)
            self.hashWorker.daemon = True
            self.hashWorker.name = 'Hash Worker #' + str(i)
            self.hashWorker.start()

    def hashFile(self, file, sync=True):
        try:
            cachedFile = cache().get(file)
            if cachedFile.mtime == file.mtime and cachedFile.size == file.size:
                return cachedFile
            else:
                raise FileChangedError()
        except (NotInCacheError, FileChangedError, ItemVersionMissmatchError):
            if False == sync:
                logger.debug('Adding file to Queue: ' + str(file))
                self.toHash.put(file)
                return file
            else:
                file = self.createHash(file)
                cache().add(file)
                return file

    def createHashWorker(self):
        while True:
            file = self.toHash.get(block=True)
            try:
                cache().get(file)
            except (NotInCacheError, ItemVersionMissmatchError):
                try:
                    file = self.createHash(file)
                    cache().add(file)
                except IOError as e:
                    self.toHash.put(file)
                    logger.error(file.path + ': ' + str(e))
                    time.sleep(10)
            finally:
                self.toHash.task_done()

    @staticmethod
    def createHash(file):
        logger.debug('Calculating new hash for ' + file.path)
        sha1 = hashlib.sha1()
        with open(file.path, 'rb') as data:
            for chunk in iter(lambda: data.read(128 * sha1.block_size), b''):
                sha1.update(chunk)
        file.hash = sha1.hexdigest()
        return file
Example #25
0
class TaskManager(object):
    """ The `TaskManager` is a singleton that manages the threads
    used to parallelize processing and the queue that manages the
    current set of prepared tasks. """

    def __init__(self, threads=10, max_queue=200, daemon=True):
        """
        :param threads: The number of threads to be spawned. Values
            ranging from 5 to 40 have shown useful, based on the amount
            of I/O involved in each task.
        :param max_queue: How many queued items should be read from the
            generator and put on the queue before processing is halted
            to allow the processing to catch up.
        :param daemon: Mark the worker threads as daemons in the
            operating system, so that they will not be included in the
            number of application threads for this script.
        """
        self.num_threads = threads
        self.max_queue = max_queue
        self.daemon = daemon
        self.queue = None

    def _spawn(self):
        """ Initialize the queue and the threads. """
        self.queue = Queue(maxsize=self.max_queue)
        for i in range(self.num_threads):
            t = Thread(target=self._consume)
            t.daemon = self.daemon
            t.start()

    def _consume(self):
        """ Main loop for each thread, handles picking a task off the
        queue, processing it and notifying the queue that it is done.
        """
        while True:
            try:
                task, args, kwargs = self.queue.get(True)
                task(*args, **kwargs)
            finally:
                self.queue.task_done()

    def put(self, task, args, kwargs):
        """ Add a new item to the queue. An item is a task and the
        arguments needed to call it.

        Do not call this directly, use Task.queue/Task.run instead.
        """
        if self.queue is None:
            self._spawn()
        self.queue.put((task, args, kwargs))

    def wait(self):
        """ Wait for each item in the queue to be processed. If this
        is not called, the main thread will end immediately and none
        of the tasks assigned to the threads would be executed. """
        if self.queue is None:
            return
        self.queue.join()
Example #26
0
class Fetcher:
    def __init__(self,threads,subject):
        self.opener = urllib.request.build_opener(urllib.request.HTTPHandler)
        self.lock = Lock()
        self.q_req = Queue()
        self.q_ans = Queue()
        self.threads = threads
        self.subject = subject
        for i in range(threads):
            t = Thread(target=self.threadget,args=subject)
            t.setDaemon(True)
            t.start()
        self.running = 0

    def __del__(self):
        time.sleep(0.5)
        self.q_req.join()
        self.q_ans.join()

    def taskleft(self):
        return self.q_req.qsize()+self.q_ans.qsize()+self.running

    def push(self, req):
        self.q_req.put(req)

    def pop(self, ans):
        return self.q_ans.get()

    def download_imag(self, subject):
        global count
        s = requests.session()
        imag = s.get(subject['cover'])
        name = subject['title']
        path = '/users/peibibing/PycharmProjects/douban/douban_movie/%s.jpg'%name
        with open(path,'wb') as f:
            f.write(imag.content)
        count += 1
        print(count)
        return 'ok'

    def threadget(self,sub):
        while True:
            req = self.q_req.get()
            with self.lock:  #保证操作的原子性
                self.running += 1
            try:
                # ans = download_imag(sub)

                ans = self.opener.open(req).read()
            except Exception:
                ans = 'error'
                print(ans)
            self.q_ans.put((req,ans))
            with self.lock:
                self.running -= 1
            self.q_req.task_done()
            time.sleep(0.1)
Example #27
0
class SuperPing(object):
    def __init__(self):
        # count how many IPs were pinged and how many responded
        self.addr_resp = 0
        self.addr_pinged = 0
        # Set a print lock
        self.print_lock = threading.Lock()
        # setting up empty queue
        self.q = Queue()

    def main(self, netip):
        import time

        # create a list that holds all IPs to be pinged
        network = []

        # verify user input
        try:
            network = ipaddress.IPv4Network(netip)
        except ValueError:
            print("Error the IP Address/Netmask is invalid for IPv4:", netip)
        except KeyboardInterrupt:
            sys.exit(0)

        # Creating threads (by changing the number in range you will increase amount of threads)
        for i in range(256):
            t = threading.Thread(target=self.threader)
            t.daemon = True
            t.start()

        self.start = time.time()

        # building the queue
        for ip in network:
            self.q.put(ip)

        self.q.join()

        time = str(time.time() - self.start)
        print("SuperPing finished:", self.addr_pinged, "IP address pinged (", self.addr_resp, "responded )", "entire job took -",
              str(time[:3]), "sec")

    # the actual function to ping the host from the queue
    def sping(self, ip):
        status, result = subprocess.getstatusoutput("ping -c1 -w1 " + str(ip))
        self.addr_pinged += 1
        if status == 0:
            self.addr_resp += 1
            with self.print_lock:
                print("Host", ip, "responded")

    # instruction for each worker
    def threader(self):
        while True:
            ip = self.q.get()
            self.sping(ip)
            self.q.task_done()
Example #28
0
class WorkerQueue(object):

    def __init__(self, num_workers = 20):
        self.queue = Queue()
        self.pool = []
        self._setup_workers(num_workers)

    def _setup_workers(self, num_workers):
        """ Sets up the worker threads
              NOTE: undefined behaviour if you call this again.
        """
        self.pool = []

        for _ in range(num_workers):
            self.pool.append(Thread(target=self.threadloop))

        for a_thread in self.pool:
            a_thread.setDaemon(True)
            a_thread.start()


    def do(self, f, *args, **kwArgs):
        """ puts a function on a queue for running later.
        """
        self.queue.put((f, args, kwArgs))


    def stop(self):
        """ Stops the WorkerQueue, waits for all of the threads to finish up.
        """
        self.queue.put(STOP)
        for thread in self.pool:
            thread.join()


    def threadloop(self): #, finish = False):
        """ Loops until all of the tasks are finished.
        """
        while True:
            args = self.queue.get()
            if args is STOP:
                self.queue.put(STOP)
                self.queue.task_done()
                break
            else:
                try:
                    args[0](*args[1], **args[2])
                finally:
                    # clean up the queue, raise the exception.
                    self.queue.task_done()
                    #raise


    def wait(self):
        """ waits until all tasks are complete.
        """
        self.queue.join()
class ThreadedGameHandler:
    """
    The :class:`ThreadedGameHandler` class is used to be able to run multiple learners on multiple emulator instances.
    It uses :class:'GameHandler' to communicate between the ALE and learner

    Parameters
    ----------
    rom : byte string
        Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin'
    show_rom : boolean
        Whether or not to show the game being played or not. True takes longer to run but can be fun to watch
    skip_frame : int
        Number of frames to skip using the last action chosen
    num_emulators : int
        Number of emulators/threads to setup and run on
    """
    def __init__(self, rom, show_rom, skip_frame, num_emulators):
        # setup list of gamehandlers and their locks
        self.emulators = list()
        for emu in range(num_emulators):
            self.emulators.append((GameHandler(rom, show_rom, skip_frame), threading.Lock()))

        # setup thread queue
        self.queue = Queue()

        # lock for unlocking/locking emulators
        self.emulator_lock = threading.Lock()
        self.current_emulator = 0
        self.num_emulators = num_emulators

    def async_run_emulator(self, learner, done_fn):
        # push to queue
        self.queue.put(self._get_next_emulator())
        t = threading.Thread(target=self._thread_run_emulator, args=(learner, done_fn))
        t.daemon = True
        t.start()

    def _thread_run_emulator(self, learner, done_fn):
        # get an emulator
        emulator, emulator_lock = self.queue.get()
        with emulator_lock:
            total_reward = emulator.run_one_game(learner)
        done_fn(total_reward)
        self.queue.task_done()

    def block_until_done(self):
        self.queue.join()

    def _get_next_emulator(self):
        with self.emulator_lock:
            emulator = self.emulators[self.current_emulator]
            self.current_emulator += 1
            self.current_emulator %= self.num_emulators
        return emulator

    def get_legal_actions(self):
        return self.emulators[0][0].get_legal_actions()
Example #30
0
File: pingit.py Project: binhvq/nms
class PingThem():
    def __init__(self, targets, maxthreads=100):
        self.q1 = Queue(maxsize=0)
        self.q2 = Queue(maxsize=0)
        self.maxthreads = maxthreads if len(targets) >= maxthreads else len(targets)
        

        for target in targets:
            self.q1.put(target)
        logging.info("Done adding all targets")

        print(self.q1.qsize())


    def worker(self):
        while 1:
            i = self.q1.get()
            # logging.info("Got value from queue: {0}".format(i))
            # quit cond
            if i is None:
                break

            p = PingIt()
            r = p.doping(i)

            self.q2.put(r)

            self.q1.task_done()

    def run(self):
        print("Will start {0} threads for checking ...".format(self.maxthreads))
        allts = []
        for i in range(self.maxthreads):
            t = Thread(target=self.worker)
            t.start()
            allts.append(t)

        self.q1.join()

        for i in range(self.maxthreads):
            self.q1.put(None)

        for t in allts:
            t.join()

        # check q2
        logging.info(self.q2.qsize())

        ret = []
        for j in range(self.q2.qsize()):
            i = self.q2.get()
            if i is None:
                break
            ret.append(i)

        return ret
Example #31
0
class QiushiSpider():
    def __init__(self):
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        }

        self.URL_q = Queue()
        self.HTML_q = Queue()
        self.RESULT_q = Queue()

    def get_url_list(self):
        # 把所有url放入url队列中
        for i in range(1, 14):
            self.URL_q.put(
                'https://www.qiushibaike.com/8hr/page/{}/'.format(i))

    def get_html(self):
        while True:
            url = self.URL_q.get()
            resp = requests.get(url, headers=self.headers)
            self.HTML_q.put(etree.HTML(resp.text))

            self.URL_q.task_done()  # 计数-1

    def get_item(self):
        """从响应的队列中取出数据"""
        while True:
            html = self.HTML_q.get()
            div_list = html.xpath(
                '//div[@class="article block untagged mb15 typs_recent"]')
            # 进行分组提取数据
            result_list = []

            for div in div_list:
                item = {}
                item['name'] = div.xpath('.//h2/text()')[0]
                item['content'] = div.xpath(
                    './/div[@class="content"]/span/text()')[0]

                result_list.append(item)
                print(len(result_list))

            self.RESULT_q.put(result_list)
            self.HTML_q.task_done()

    def save_result(self):
        while True:
            result_list = self.RESULT_q.get()
            for item in result_list:
                print(item)
            self.RESULT_q.task_done()

    def run(self):
        self.get_url_list()

        # t_html = Thread(target=self.get_html)
        # t_html.setDaemon(True)  # 设为守护线程结束,子线程随之结束
        # t_html.start()
        #
        # t_result = Thread(target=self.get_item)
        # t_result.start()
        #
        # t_save = Thread(target=self.save_result)
        # t_save.start()

        t_list = []
        for i in range(3):
            t_html = Thread(target=self.get_html)
            t_list.append(t_html)
        for i in range(10):
            t_result = Thread(target=self.get_item)
            t_list.append(t_result)
        t_save = Thread(target=self.save_result)
        t_list.append(t_save)

        for t in t_list:
            # 设为守护线程,子线程结束,子线程随之结束
            t.setDaemon(True)
            t.start()

        for q in [self.URL_q, self.HTML_q, self.RESULT_q]:
            q.join()  # 当前线程(主线程)阻塞,
Example #32
0
class TaskIO(object):
    """Object used to stream I/O between a
    running Mesos task and the local terminal.

    :param task: task ID
    :type task: str
    :param cmd: a command to launch inside the task's container
    :type cmd: str
    :param args: Additional arguments for the command
    :type args: str
    :param env: List of environment variable to enrich the shell with (NAME=value, colon separated)
    :type env: str
    :param interactive: whether to attach STDIN of the current
                        terminal to the new command being launched
    :type interactive: bool
    :param tty: whether to allocate a tty for this command and attach
                the local terminal to it
    :type tty: bool
    """

    # The interval to send heartbeat messages to
    # keep persistent connections alive.
    HEARTBEAT_INTERVAL = 30
    HEARTBEAT_INTERVAL_NANOSECONDS = HEARTBEAT_INTERVAL * 1000000000

    def __init__(self, agent_url, container_id, parent_container_id=None, user=None, cmd=None,
                 env=None,
                 args=None, interactive=False, tty=False):
        # Store relevant parameters of the call for later.
        self.cmd = cmd
        self.env = env
        self.interactive = interactive
        self.tty = tty
        self.args = args

        # Get the URL to the agent running the task.
        self.agent_url = urllib.parse.urljoin(agent_url, 'api/v1')

        # Grab a reference to the container ID for the task.
        self.parent_id = container_id
        self.parent_container_id = parent_container_id
        self.user = user

        # Generate a new UUID for the nested container
        # used to run commands passed to `task exec`.
        self.container_id = str(uuid.uuid4())

        # Set up a recordio encoder and decoder
        # for any incoming and outgoing messages.
        self.encoder = recordio.Encoder(
            lambda s: bytes(json.dumps(s, ensure_ascii=False), "UTF-8"))
        self.decoder = recordio.Decoder(
            lambda s: json.loads(s.decode("UTF-8")))

        # Set up queues to send messages between threads used for
        # reading/writing to STDIN/STDOUT/STDERR and threads
        # sending/receiving data over the network.
        self.input_queue = Queue()
        self.output_queue = Queue()

        # Set up an event to block attaching
        # input until attaching output is complete.
        self.attach_input_event = threading.Event()
        self.attach_input_event.clear()

        # Set up an event to block printing the output
        # until an attach input event has successfully
        # been established.
        self.print_output_event = threading.Event()
        self.print_output_event.clear()

        # Set up an event to block the main thread
        # from exiting until signaled to do so.
        self.exit_event = threading.Event()
        self.exit_event.clear()

        # Use a class variable to store exceptions thrown on
        # other threads and raise them on the main thread before
        # exiting.
        self.exception = None

    def run(self):
        """Run the helper threads in this class which enable streaming
        of STDIN/STDOUT/STDERR between the CLI and the Mesos Agent API.

        If a tty is requested, we take over the current terminal and
        put it into raw mode. We make sure to reset the terminal back
        to its original settings before exiting.
        """

        # Without a TTY.
        if not self.tty:
            try:
                self._start_threads()
                self.exit_event.wait()
            except Exception as e:
                self.exception = e

            if self.exception:
                raise self.exception
            return

        # With a TTY.
        if util.is_windows_platform():
            raise MesosException(
                "Running with the '--tty' flag is not supported on windows.")

        if not sys.stdin.isatty():
            raise MesosException(
                "Must be running in a tty to pass the '--tty flag'.")

        fd = sys.stdin.fileno()
        oldtermios = termios.tcgetattr(fd)

        try:
            if self.interactive:
                tty.setraw(fd, when=termios.TCSANOW)
                self._window_resize(signal.SIGWINCH, None)
                signal.signal(signal.SIGWINCH, self._window_resize)

            self._start_threads()
            self.exit_event.wait()
        except Exception as e:
            self.exception = e

        termios.tcsetattr(
            sys.stdin.fileno(),
            termios.TCSAFLUSH,
            oldtermios)

        if self.exception:
            raise self.exception

    def _thread_wrapper(self, func):
        """A wrapper around all threads used in this class

        If a thread throws an exception, it will unblock the main
        thread and save the exception in a class variable. The main
        thread will then rethrow the exception before exiting.

        :param func: The start function for the thread
        :type func: function
        """
        try:
            func()
        except Exception as e:
            self.exception = e
            self.exit_event.set()

    def _start_threads(self):
        """Start all threads associated with this class
        """
        if self.interactive:
            # Collects input from STDIN and puts
            # it in the input_queue as data messages.
            thread = threading.Thread(
                target=self._thread_wrapper,
                args=(self._input_thread,))
            thread.daemon = True
            thread.start()

            # Prepares heartbeat control messages and
            # puts them in the input queueaat a specific
            # heartbeat interval.
            thread = threading.Thread(
                 target=self._thread_wrapper,
                 args=(self._heartbeat_thread,))
            thread.daemon = True
            thread.start()

            # Opens a persistent connection with the mesos agent and
            # feeds it both control and data messages from the input
            # queue via ATTACH_CONTAINER_INPUT messages.
            thread = threading.Thread(
                 target=self._thread_wrapper,
                 args=(self._attach_container_input,))
            thread.daemon = True
            thread.start()

        # Opens a persistent connection with a mesos agent, reads
        # data messages from it and feeds them to an output_queue.
        thread = threading.Thread(
            target=self._thread_wrapper,
            args=(self._launch_nested_container_session,))
        thread.daemon = True
        thread.start()

        # Collects data messages from the output queue and writes
        # their content to STDOUT and STDERR.
        thread = threading.Thread(
            target=self._thread_wrapper,
            args=(self._output_thread,))
        thread.daemon = True
        thread.start()

    def _launch_nested_container_session(self):
        """Sends a request to the Mesos Agent to launch a new
        nested container and attach to its output stream.
        The output stream is then sent back in the response.
        """

        message = {
            'type': "LAUNCH_NESTED_CONTAINER_SESSION",
            'launch_nested_container_session': {
                'container_id': {
                    'parent': { 'value': self.parent_id },
                    'value': self.container_id
                },
                'command': {
                    'value': self.cmd,
                    'arguments': [self.cmd] + self.args,
                    'shell': False}}}
        # If we have to launch in a task group, we need double nesting
        if self.parent_container_id is not None:
            message['launch_nested_container_session']['container_id']['parent']['parent'] = { 'value': self.parent_container_id }
        if self.env is not None:
            env_vars = []
            env_var_regex = re.compile('^([A-Z_][A-Z0-9_]*)=(.*)$')
            for env_var in self.env.split(':'):
                matches = env_var_regex.match(env_var)
                if matches and len(matches.groups()) == 2:
                    env_vars.append({
                        'name': matches.group(1),
                        'type': 'VALUE',
                        'value': matches.group(2)
                    })
            message['launch_nested_container_session']['command']['environment'] = {
                'variables': env_vars
            }

        if not self.user is None:
          message['launch_nested_container_session']['command']['user'] =\
             self.user

        if self.tty:
            message[
                'launch_nested_container_session'][
                    'container'] = {
                        'type': 'MESOS',
                        'tty_info': {}}

        req_extra_args = {
            'stream': True,
            'headers': {
                'Content-Type': 'application/json',
                'Accept': 'application/recordio',
                'Message-Accept': 'application/json'}}

        response = http.post(
            self.agent_url,
            data=json.dumps(message),
            timeout=None,
            **req_extra_args)

        self._process_output_stream(response)

    def _process_output_stream(self, response):
        """Gets data streamed over the given response and places the
        returned messages into our output_queue. Only expects to
        receive data messages.

        :param response: Response from an http post
        :type response: requests.models.Response
        """

        # Now that we are ready to process the output stream (meaning
        # our output connection has been established), allow the input
        # stream to be attached by setting an event.
        self.attach_input_event.set()

        # If we are running in interactive mode, wait to make sure that
        # our input connection succeeds before pushing any output to the
        # output queue.
        if self.interactive:
            self.print_output_event.wait()

        try:
            for chunk in response.iter_content(chunk_size=None):
                records = self.decoder.decode(chunk)

                for r in records:
                    if r.get('type') and r['type'] == 'DATA':
                        self.output_queue.put(r['data'])
        except Exception as e:
            raise MesosException(
                "Error parsing output stream: {error}".format(error=e))

        self.output_queue.join()
        self.exit_event.set()

    def _attach_container_input(self):
        """Streams all input data (e.g. STDIN) from the client to the agent
        """

        def _initial_input_streamer():
            """Generator function yielding the initial ATTACH_CONTAINER_INPUT
            message for streaming. We have a separate generator for this so
            that we can attempt the connection once before committing to a
            persistent connection where we stream the rest of the input.

            :returns: A RecordIO encoded message
            """

            message = {
                'type': 'ATTACH_CONTAINER_INPUT',
                'attach_container_input': {
                    'type': 'CONTAINER_ID',
                    'container_id': {
                        'parent': { 'value': self.parent_id },
                        'value': self.container_id}}}
            if self.parent_container_id is not None:
                message['attach_container_input']['container_id']['parent']['parent'] = { 'value': self.parent_container_id }

            yield self.encoder.encode(message)

        def _input_streamer():
            """Generator function yielding ATTACH_CONTAINER_INPUT
            messages for streaming. It yields the _intitial_input_streamer()
            message, followed by messages from the input_queue on each
            subsequent call.

            :returns: A RecordIO encoded message
            """

            yield next(_initial_input_streamer())

            while True:
                record = self.input_queue.get()
                if not record:
                    break
                yield record

        req_extra_args = {
            'headers': {
                'Content-Type': 'application/recordio',
                'Message-Content-Type': 'application/json',
                'Accept': 'application/json',
                'Connection': 'close',
                'Transfer-Encoding': 'chunked'
            }
        }

        # Ensure we don't try to attach our input to a container that isn't
        # fully up and running by waiting until the
        # `_process_output_stream` function signals us that it's ready.
        self.attach_input_event.wait()

        # Send an intial "Test" message to ensure that we are able to
        # establish a connection with the agent. If we aren't we will throw
        # an exception and break out of this thread. However, in cases where
        # we receive a 500 response from the agent, we actually want to
        # continue without throwing an exception. A 500 error indicates that
        # we can't connect to the container because it has already finished
        # running. In that case we continue running to allow the output queue
        # to be flushed.
        try:
            http.post(
                self.agent_url,
                data=_initial_input_streamer(),
                **req_extra_args)
        except MesosHTTPException as e:
            if not e.response.status_code == 500:
                raise e

        # If we succeeded with that connection, unblock process_output_stream()
        # from sending output data to the output thread.
        self.print_output_event.set()

        # Begin streaming the input.
        http.post(
            self.agent_url,
            data=_input_streamer(),
            timeout=None,
            **req_extra_args)

    def _input_thread(self):
        """Reads from STDIN and places a message
        with that data onto the input_queue.
        """

        message = {
            'type': 'ATTACH_CONTAINER_INPUT',
            'attach_container_input': {
                'type': 'PROCESS_IO',
                'process_io': {
                    'type': 'DATA',
                    'data': {
                        'type': 'STDIN',
                        'data': ''}}}}

        for chunk in iter(partial(os.read, sys.stdin.fileno(), 1024), b''):
            message[
                'attach_container_input'][
                    'process_io'][
                        'data'][
                            'data'] = base64.b64encode(chunk).decode('utf-8')

            self.input_queue.put(self.encoder.encode(message))

        # Push an empty string to indicate EOF to the server and push
        # 'None' to signal that we are done processing input.
        message['attach_container_input']['process_io']['data']['data'] = ''
        self.input_queue.put(self.encoder.encode(message))
        self.input_queue.put(None)

    def _output_thread(self):
        """Reads from the output_queue and writes the data
        to the appropriate STDOUT or STDERR.
        """

        while True:
            # Get a message from the output queue and decode it.
            # Then write the data to the appropriate stdout or stderr.
            output = self.output_queue.get()
            if not output.get('data'):
                raise MesosException("Error no 'data' field in output message")

            data = output['data']
            data = base64.b64decode(data.encode('utf-8'))

            if output.get('type') and output['type'] == 'STDOUT':
                sys.stdout.buffer.write(data)
                sys.stdout.flush()
            elif output.get('type') and output['type'] == 'STDERR':
                sys.stderr.buffer.write(data)
                sys.stderr.flush()
            else:
                raise MesosException("Unsupported data type in output stream")

            self.output_queue.task_done()

    def _heartbeat_thread(self):
        """Generates a heartbeat message to send over the
        ATTACH_CONTAINER_INPUT stream every `interval` seconds and
        inserts it in the input queue.
        """

        interval = self.HEARTBEAT_INTERVAL
        nanoseconds = self.HEARTBEAT_INTERVAL_NANOSECONDS

        message = {
            'type': 'ATTACH_CONTAINER_INPUT',
            'attach_container_input': {
                'type': 'PROCESS_IO',
                'process_io': {
                    'type': 'CONTROL',
                    'control': {
                        'type': 'HEARTBEAT',
                        'heartbeat': {
                              'interval': {
                                   'nanoseconds': nanoseconds}}}}}}

        while True:
            self.input_queue.put(self.encoder.encode(message))
            time.sleep(interval)

    def _window_resize(self, signum, frame):
        """Signal handler for SIGWINCH.

        Generates a message with the current demensions of the
        terminal and puts it in the input_queue.

        :param signum: the signal number being handled
        :type signum: int
        :param frame: current stack frame
        :type frame: frame
        """

        # Determine the size of our terminal, and create the message to be sent
        rows, columns = os.popen('stty size', 'r').read().split()

        message = {
            'type': 'ATTACH_CONTAINER_INPUT',
            'attach_container_input': {
                'type': 'PROCESS_IO',
                'process_io': {
                    'type': 'CONTROL',
                    'control': {
                        'type': 'TTY_INFO',
                        'tty_info': {
                              'window_size': {
                                  'rows': int(rows),
                                  'columns': int(columns)}}}}}}

        self.input_queue.put(self.encoder.encode(message))
Example #33
0
q = Queue()
t = Thread(target=queue_putter)
t.daemon = True
t.start()

tg = TelegramClient(StringSession(config.session), config.api_id,
                    config.api_hash)
tg.start()

while True:
    item = q.get()
    if item is None:
        break
    try:
        if item.chattype == 'channel':
            entity = tg.get_entity(PeerChannel(int(item.chat)))
        elif item.chattype == 'user':
            entity = tg.get_entity(PeerUser(int(item.chat)))
        elif item.chattype == 'chat':
            entity = tg.get_entity(PeerChat(int(item.chat)))
        else:
            entity = item.chat
        msg = tg.send_message(entity, item.text)
        if item.delete:
            msg.delete()
    except ValueError as e:
        print(e.args)

    q.task_done()
Example #34
0
class TxThread(Thread):
    def __init__(self, socket_timeout_ms):
        super(TxThread, self).__init__()
        self.socket_timeout_ms = socket_timeout_ms
        self._queue = Queue(1)
        self.peers = {}
        self.peer_payload = None
        self.peer_message = None
        self.error = False
        self.peers_lock = Lock()

    def add_peer(self, name, host, port):
        LOGGER.debug("Adding peer %s (%s:%d)...", name, host, port)
        conn = WorkerConn(name, host, port)
        with self.peers_lock:
            self.peers[name] = conn
        LOGGER.debug("peer %s added.", name)

    def remove_peer(self, name):
        LOGGER.debug("Removing peer %s...", name)
        with self.peers_lock:
            peer = self.peers[name]
            if peer.connected:
                peer.sock.close()
            del self.peers[name]
        LOGGER.debug("peer %s removed.", name)

    def _get_random_peer(self):
        # The score is a weighted sum (with equal weights)
        with self.peers_lock:
            scores = {k: v.flow_control_score + \
                         random.randint(FLOW_CONTROL_MIN_SCORE, FLOW_CONTROL_MAX_SCORE) \
                      for k, v in self.peers.items()}

        if len(scores) == 0:
            LOGGER.debug("No peers were added.")
            return None

        max_score = max(scores.values())

        # There may be multiple peers with the same max score (rare)
        max_scores_keys = [k for k, v in scores.items() if v == max_score]
        key = max_scores_keys[random.randint(0, len(max_scores_keys)-1)]
        peer = self.peers[key]

        LOGGER.debug("peer %s selected score=%d, flow_control_score=%d",
                     peer.name, max_score, peer.flow_control_score)

        # Make sure the client is connected
        try:
            if not peer.connected:
                peer.sock = _create_tcp_socket()
                peer.sock.settimeout(self.socket_timeout_ms/1000)
                peer.sock.connect((peer.host, peer.port))
                peer.connected = True
                LOGGER.debug("connected to peer %s successfully", peer.name)
        except ConnectionRefusedError:
            LOGGER.debug("peer %s not listening yet", peer.name)
            self._flow_control_dec(peer)
            return None
        except:
            LOGGER.exception("Couldn't connect to peer %s (unrecoverable)", peer.name)
            self.remove_peer(peer.name)
            return None

        return peer

    def _flow_control_inc(self, peer):
        """Increase the flow control score of peer."""
        peer.flow_control_score = min(peer.flow_control_score + FLOW_CONTROL_INC_SCORE,
                                      FLOW_CONTROL_MAX_SCORE)

    def _flow_control_dec(self, peer):
        """Decrease the flow control score of peer."""
        peer.flow_control_score = max(peer.flow_control_score - FLOW_CONTROL_DEC_SCORE,
                                      FLOW_CONTROL_MIN_SCORE)

    def run(self):
        LOGGER.info("TxThread: run()")
        while True:
            witem = self._queue.get(block=True)
            LOGGER.debug("TxThread: have work...")
            if not witem:
                LOGGER.info("Exiting TxThread...")
                break

            # Wait until we succefully fetch from a peer,
            # or until we don't have any peers to fetch from
            done = False
            while not done:
                peer = self._get_random_peer()
                if peer is None:
                    self.peer_payload = None
                    self.peer_message = None
                    done = True
                    continue

                try:
                    # Send a fetch parameters request
                    LOGGER.debug("TxThread: Sending message fd=%d", peer.sock.fileno())
                    send_message(peer.sock, MESSAGE_TYPE_FETCH_PARAMETERS)
                    message_type, self.peer_message, self.peer_payload = recv_message(peer.sock)
                    assert message_type == MESSAGE_TYPE_FETCH_PARAMETERS

                    self._flow_control_inc(peer)
                    done = self.peer_payload is not None

                except socket.timeout:
                    LOGGER.warning("TxThread: peer %s timeout, restarting connection...", peer.name)
                    self._flow_control_dec(peer)
                    peer.sock.close()
                    peer.sock = None
                    peer.connected = False

                except:
                    LOGGER.exception("Error connecting with peer %s.", peer.name)
                    self.remove_peer(peer.name)

            self._queue.task_done()

        LOGGER.info("TxThread: exiting...")

    def fetch_send(self):
        """Initiate an async fetch_parameters request.

        Selects a random peer and fetch its latest parameters.
        """
        self._queue.put(True)

    def fetch_wait(self):
        """Waits for the fetch_parameters request to complete."""
        self._queue.join()
        return self.peer_message, self.peer_payload

    def shutdown(self):
        self._queue.put(False)
        self._queue.join()
        self.join()
Example #35
0
class DownloadManager:
    def __init__(self, urls, output_directory, nb_workers, **options):

        self.output_directory = output_directory
        self.nb_workers = min(nb_workers, len(urls))
        self.options = options

        self._urls = Queue()
        self._state = DownloadState.not_started
        self._download_handlers = []

        # initialize the queue
        for i, url in enumerate(urls):
            self._urls.put((i, url))
            self._download_handlers.append(DownloadHandler(url))

    def log(self, *args, **kwargs):
        if self.options.get('quiet'):
            return
        print(*args, **kwargs)

    @staticmethod
    def get_downloader(url):

        parsed_url = urlparse(url)
        for downloader in SCHEMES.get(parsed_url.scheme, []):
            if downloader.can_handle_url(url):
                return downloader

        raise NotImplementedError('No downloader for {} urls'.format(
            parsed_url.scheme))

    @property
    def state(self):
        return self._state

    @state.setter
    def state(self, value):
        current_state = self._state
        if value not in STATE_TRANSITIONS[current_state]:
            raise TransitionError(current_state, value)
        self._state = value

    def process(self):
        self.state = DownloadState.started

        try:
            watcher = Thread(target=self.watcher)
            watcher.start()

            for _ in range(self.nb_workers):
                t = Thread(target=self.worker)
                t.start()

            self._urls.join()
            watcher.join()
        except KeyboardInterrupt:
            self.cancel()

    def worker(self):
        while True:
            state = self.state
            if state not in [DownloadState.paused, DownloadState.started]:
                break

            if state == DownloadState.paused:
                time.sleep(0.1)
                continue

            try:
                index, url = self._urls.get_nowait()
            except Empty:
                break

            downloader = self.process_single_url(url)
            if downloader:
                self._download_handlers[index].downloader = downloader
                downloader.start()
            self._urls.task_done()

    def process_single_url(self, url):
        try:
            downloader = self.get_downloader(url)
        except NotImplementedError as e:
            self.log('{}: skipping {}'.format(e, url))
            return None

        output = os.path.join(self.output_directory)
        download_process = downloader(url, output)
        return download_process

    def watcher(self):

        while True:

            for download_handler in self._download_handlers:
                download_handler.update_progress()

            if not self._urls.unfinished_tasks:
                break
            time.sleep(1)

        self.state = DownloadState.finished
        tqdm.write('')

    def pause(self):
        self.state = DownloadState.pausing
        for download_handler in self._download_handlers:
            download_handler.pause()
        self.state = DownloadState.paused

    def resume(self):
        self.state = DownloadState.resuming
        for download_handler in self._download_handlers:
            download_handler.resume()
        self.state = DownloadState.started

    def cancel(self):
        self.state = DownloadState.canceling
        for download_handler in self._download_handlers:
            download_handler.cancel()
        self.state = DownloadState.canceled
Example #36
0
        stream.change_config()
        stream.process_samples = handle_samples
        stream.process_config = handle_config
        stream.start()
        stream.loop_callbacks()

    streaming_thread = Thread(target=run, name='sample-stream', args=(sample_queue,), daemon=True)
    return streaming_thread


if __name__ == '__main__':
    args = parseArgs()

    sample_queue = Queue()
    streaming_thread = configure_live_stream(sample_queue)
    streaming_thread.start()

    buffers_to_accumulate = np.ceil(args['duration'] / (EASY_DSP_AUDIO_BUFFER_LENGTH_MS / 1000))
    past_buffers = []
    while len(past_buffers) != buffers_to_accumulate:
        samples = sample_queue.get()
        past_buffers += [samples]
        sample_queue.task_done()

    recording = np.concatenate(past_buffers, axis=0)
    wavfile.write(args['outputFile'], EASY_DSP_AUDIO_FREQ_HZ, recording)

    # Needed because Thread('sample-stream') is on an infinite loop. (This is by construction of streaming.py.)
    sys.exit()
Example #37
0
class Schedular:
    def __init__(self, threadnum=1):
        self.queue = Queue()
        self.ip_queue = Queue()
        self.threadNum = threadnum
        self.lock = threading.Lock()
        self.cache_ips = []  # IP缓冲池
        self.cache_domains = []  # 域名缓冲池
        # change
        # logger.info()

    def put_target(self, target):
        # 判断是IP还是域名,加入不同的字段
        if is_ip_address_format(target):
            serviceType = "ip"
        elif is_url_format(target):
            serviceType = "domain"
            target = target.rstrip('/')
        else:
            serviceType = "other"

        tmp = {"target": target, "serviceType": serviceType}
        if serviceType == "ip":
            self.ip_queue.put(tmp)
        else:
            self.queue.put(tmp)
        task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())
        # 通知redis组件对redis信息进行更新
        # 到底是怎么更新的啊

    def receive_ip(self):

        while 1:
            struct = self.ip_queue.get()
            serviceType = struct.get("serviceType", "other")
            task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())
            if serviceType == "ip":
                flag = False
                self.lock.acquire()
                self.cache_ips.append(struct)
                num = len(self.cache_ips)
                if num >= NUM_CACHE_IP:
                    flag = True
                    serviceTypes = self.cache_ips  # 获取缓冲?
                    self.cache_ips = []
                self.lock.release()
                if not flag:
                    self.ip_queue.task_done()
                    continue
                task_update("running", 1)
                try:
                    self.hand_ip(serviceTypes)
                except Exception as e:
                    continue
                logger.error("hand ip error:{}".format(repr(e)))
                logger.error(repr(sys.exc_info()))
                task_update("running", -1)
            self.ip_queue.task_done()
            task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())

    def receive(self):

        while 1:

            try:
                struct = self.queue.get(timeout=1)
            except Exception as e:
                continue

            task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())

            serviceType = struct.get("serviceType", "other")
            if serviceType == "other":
                msg = "not matches targets:{}".format(repr(struct))
                logger.error(msg)
                self.queue.task_done()
                continue

            elif serviceType == "domain":
                flag = False
                self.lock.acquire()
                self.cache_domains.append(struct)
                num = len(self.cache_domains)
                if num >= NUM_CACHE_DOMAIN:
                    flag = True
                    serviceTypes = self.cache_domains
                    # 刷新缓存列表
                    self.cache_domains = []
                self.lock.release()
                if not flag:
                    self.queue.task_done()
                    continue
                    # 多线程启动扫描域名
                for serviceType in serviceTypes:
                    task_update("running", 1)
                    try:
                        self.hand_domain(serviceType)
                    except Exception as e:
                        logger.error("hand domain error :{}".format(repr(e)))
                        logger.error(repr(sys.exc_info()))
                    task_update("running", -1)
            self.queue.task_done()
            task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())

    def start(self):

        for i in range(self.threadNum - 1):
            print(i)
            _thread.start_new_thread(self.receive, ())
        _thread.start_new_thread(self.receive_ip, ())

    def nmap_result_handle(self, result_nmap: dict, host):
        # 处理nmap插件返回的数据
        if result_nmap is None:
            return None
        result2 = {}
        for port, portInfo in result_nmap.items():
            if host not in result2:
                result2[host] = []
            if portInfo["state"] != "open":
                continue
            name = portInfo.get("name", "")
            # hand nmap bug
            product = portInfo.get("product", "")
            version = portInfo.get("version", "")
            extrainfo = portInfo.get("extrainfo", "")

            if "http" in name and "https" not in name:
                if port == 443:
                    _url = "https://{0}:{1}".format(host, port)
                else:
                    _url = "http://{0}:{1}".format(host, port)
                self.put_target(_url)
            elif "https" in name:
                _url = "https://{0}:{1}".format(host, port)
                self.put_target(_url)
            result2[host].append({
                "port": port,
                "name": name,
                "product": product,
                "version": version,
                "extrainfo": extrainfo
            })
            return result2

    def hand_ip(self, serviceTypes, option='masscan'):
        ip_list = []

        for item in serviceTypes:
            ip_list.append(item["target"])
        ports = MASSCAN_DEFAULT_PORT
        result2 = {}
        if option == 'masscan':
            if MASSCAN_FULL_SCAN:
                ports = "1-65535"
            target = os.path.join(PATHS.OUTPUT_PATH,
                                  "target_{0}.log".format(time.time()))
            with open(target, "w+") as fp:
                fp.write('\n'.join(ip_list))
            logger.debug("ip:" + repr(ip_list))
            try:
                result = masscan(target, ports)
            except Exception as e:
                logger.error("masscan error msg:{}".format(repr(e)))
                result = None
            if result is None:
                return None
            for host, ports in result.times():
                ports = list(ports)
                if host not in result2:
                    result2[host] = []
                task_update("running", 1)
                try:
                    result_nmap = nmapscan(host, ports)
                except:
                    result_nmap = None
                task_update("runnning", -1)
                if result_nmap is None:
                    for tmp_port in ports:
                        result2[host].append({"port": tmp_port})
                    continue
                tmp_r = self.nmap_result_handle(result_nmap, host=host)
                result2.update(tmp_r)
        elif option == "nmap":
            logger.debug("ip:" + repr(ip_list))
            for host in ip_list:
                result_nmap = nmapscan(host, ports.split(","))
                tmp_r = self.nmap_result_handle(result_nmap, host=host)
                if tmp_r:
                    result2.update(tmp_r)
        data = {}

        # 返回所有的ip,并且根据代码寻找对应的地理信息
        for ip in result2.keys():
            if ip not in data:
                data[ip] = {}
            d = ip_location.poc(ip)
            if d:
                data[ip]["location"] = d
            data[ip]["infos"] = result2[ip]

        collector.add_ips(data)
        # 将信息全部保存,然后发送信息,将信息发送至前端
        for ip in result2.keys():
            collector.send_ok_ip(ip)

    def hand_domain(self, serviceType):
        target = serviceType["target"]
        logger.info(target)
        # 添加这条记录
        collector.add_domain(target)
        # 发起请求
        try:
            r = requests.get(target,
                             timeout=30,
                             verify=False,
                             allow_redirects=False)
            collector.add_domain_info(target, {
                "headers": r.headers,
                "body": r.text,
                "status_code": r.status_code
            })
        except Exception as e:
            logger.error("request url error:" + str(e))
            collector.del_domain(target)
            return
        logger.debug("target:{} over,start to scan".format(target))

        # Get hostname
        # ???????????WDNMD
        hostname = urlparse(target).netloc.split(":")[0]
        if not is_ip_address_format(hostname):
            try:
                # return the host from socket
                _ip = socket.gethostbyname(hostname)
                collector.add_domain_info(target, {"ip": _ip})
            except:
                pass
        else:
            collector.add_domain_info(target, {"ip": hostname})
        # 需要启动那些poc进行目标信息扫描
        work_list = [webeye.poc, webtitle.poc, wappalyzer.poc]
        # password_found.poc

        if IS_START_PLUGINS:
            pass
            work_list.append(crossdomain.poc)
            # work_list.append(directory_browse.poc)
            work_list.append(gitleak.poc)
            work_list.append(iis_parse.poc)
            work_list.append(phpinfo.poc)
            work_list.append(svnleak.poc)
            work_list.append(tomcat_leak.poc)
            # work_list.append(whatcms.poc)

        # 信息直接从函数的内部利用collector进行存储

        for func in work_list:
            try:
                func(target)
            except Exception as e:
                logger.error("domain plugin threading error {}:{}".format(
                    repr(Exception), str(e)))
                pass
        logger.debug("target:{} End of scan".format(target))
        collector.print_domains()
        infos = collector.get_domain(target)
        _pocs = []
        temp = {}
        if IS_START_PLUGINS and "CMS" in infos:
            if infos.get("app"):
                temp["app"] = []
                temp["app"].append(infos["CMS"])
            else:
                temp["app"] = [infos["CMS"]]
            # update domain app
            collector.add_domain_info(target, temp)

        if temp.get("app"):
            keywords = temp["app"]
            # 远程读取插件
            pocs = load_remote_poc()

            for poc in pocs:
                for keyword in keywords:
                    webfile = poc["webfile"]
                    logger.debug("load {0} poc:{1} poc_time:{2}".format(
                        poc["type"], webfile, poc["time"]))

                    # 加载插件 加载远程文件目录 将其转换成实体

                    code = requests.get(webfile).text
                    obj = load_string_to_moudle(code, webfile)
                    # 在模块对象列表中加入远程模块
                    _pocs.append(obj)
        # 并发执行插件
        if _pocs:
            executor = futures.ThreadPoolExecutor(len(_pocs))
            fs = []
            for f in _pocs:
                taks = executor.submit(f.poc, target)
                # 这儿返回的是啥子鸡巴啊  每个线程的控制类?
                fs.append(taks)
            for f in futures.as_completed(fs):
                try:
                    res = f.result()
                except Exception as e:
                    res = None
                    logger.error("load poc error:{} error:{}".format(
                        target, str(e)))
                if res:
                    name = res.get("name") or "scan_" + str(time.time())
                    collector.add_domain_bug(target, {name: res})
        # 通过异步调用插件得到返回结果,并且通过collector返送结果
        collector.send_ok(target)
        print("print collector")
        print(collector.collect_domains)

    def run(self):
        while 1:
            # 对剩余未处理的域名进行处理
            if self.cache_domains:
                self.lock.acquire()
                service_types = self.cache_domains
                self.cache_domains = []
                self.lock.release()

                for serviceType in service_types:
                    task_update("running", 1)
                    try:
                        self.hand_domain(serviceType)
                    except Exception as e:
                        logger.error(repr(sys.exc_info()))
                        pass
                    task_update("running", -1)

            # 对剩余未处理的ip进行处理
            if self.cache_ips:
                self.lock.acquire()
                service_types = self.cache_ips
                self.cache_ips = []
                self.lock.release()

                task_update("running", 1)

                try:
                    self.hand_ip(service_types)
                except Exception as e:
                    logger.error(repr(sys.exc_info()))
                    pass
                task_update("runnning", -1)

            # 最后一次提交
            collector.submit()
            task_update("tasks", self.queue.qsize() + self.ip_queue.qsize())
            time.sleep(random.randint(2, 10))
class ScanBackUP(object):

    def __init__(self) -> None:
        # super().__init__()
        # 所有数据先到这
        self._input = None
        # 所有数据先复制一份到这, 这个是程序不用管的文件夹
        self._esinput = None
        # 将要备份的数据放到这, 要处理的数据全部放在这里
        self._dbu_input = None
        self._databack = None
        self._zipdata: Path = None
        self._zip_size = None
        # 备份线程默认为一个,可以在配置里面更改重启
        self.backup_thread = 1
        self.zip_thread = 1
        # 增加一个是否拷贝到ES的功能
        self.copy_esinput_enable = True
        self._tmp = Path('./tmp')
        self._tmp.mkdir(exist_ok=True)
        # 文件是否需要拷贝一份到旧索引
        self._old_esinput = None
        self.config_path = Path(r'./config_path.json')
        try:
            self._init_cpinfo()
        except:
            raise Exception(
                f"初始化配置参数失败,请检查配置文件\nerror:{traceback.format_exc()}")
        # 需要用到的参数
        # 文件锁,同一时间只允许一个线程操作文件
        self.__file_locker = threading.Lock()
        self.__scan_file_locker = threading.Lock()
        self._zipfile_locker = threading.Lock()
        # 因为压缩可能处理的时间比较长,所以需要增加一个正在压缩的字典
        self._zip_dealing = {}
        # 根据后缀分配的需要处理的队列,目前只有iscan
        self.iscan_task_queue = Queue()
        self._zip_queue = Queue()
        self.iscan_suffix = '.iscan_search'
        # try:
        #     self._restore_existdata()
        # except:
        #     raise Exception(
        #         "There's something wrong with restoring the environment")

    def _init_cpinfo(self):
        """
        初始化配置文件中的路径和参数
        :return:
        """
        conf_str = self.config_path.read_text(encoding='utf-8')
        conf_dict = json.loads(conf_str)
        _input = conf_dict.get('data_input')
        if not isinstance(_input, str):
            raise Exception("Unknown data_input path")
        self._input = Path(_input)
        self._input.mkdir(exist_ok=True)
        print(
            f"Start scan data file, input_file_path:{self._input.as_posix()}")
        _esinput = conf_dict.get('es_input')
        if not isinstance(_esinput, str):
            raise Exception("Unknown es_input path")
        self._esinput = Path(_esinput)
        self._esinput.mkdir(exist_ok=True)
        print(f"Save data to ES, es_path:{self._esinput.as_posix()}")
        _dbuinput = conf_dict.get('backup_input')
        if not isinstance(_dbuinput, str):
            raise Exception("Unkown backup_input path")
        self._dbu_input = Path(_dbuinput)
        self._dbu_input.mkdir(exist_ok=True)
        print(f"Data backup process path:{self._dbu_input.as_posix()}")
        _databack = conf_dict.get('databackup')
        if not isinstance(_databack, str):
            raise Exception("Unknown databackup path")
        self._databack = Path(_databack)
        self._databack.mkdir(exist_ok=True)
        print(f"Data save backup path:{self._databack.as_posix()}")
        _zipdata = conf_dict.get('zipdata')
        if not isinstance(_zipdata, str):
            raise Exception("Unkown zipdata path")
        self._zipdata = Path(_zipdata)
        self._zipdata.mkdir(exist_ok=True)
        print(f"Zipdata save path:{self._zipdata.as_posix()}")
        _zip_size = conf_dict.get('zip_size')
        if not isinstance(_zip_size, int):
            raise Exception("Unknown zip_size type")
        # 将单位换算成B
        self._zip_size = _zip_size * 1024 * 1024
        print(f"Zip data size:{_zip_size}MB")
        backupthread = conf_dict.get('backup_thread')
        if not isinstance(backupthread, int):
            raise Exception("Unknown backupthread type")
        self.backup_thread = backupthread
        zipthread = conf_dict.get('zipdata_thread')
        if not isinstance(zipthread, int):
            raise Exception("Unknown zipthread type")
        self.zip_thread = zipthread
        time_limit = conf_dict.get('time_limit')
        if not isinstance(time_limit, int):
            raise Exception("Unknown time_limit type")
        self._backup_interval_time = time_limit * 24 * 60 * 60
        print(f"Zip data time expired after {time_limit} days")
        # 默认拷贝到ES的功能为开放
        copy_esinput_enable = conf_dict.get('copy_to_esinput', True)
        self.copy_esinput_enable = copy_esinput_enable
        # 拷贝旧索引数据
        _old_esinput = conf_dict.get('old_esinput')
        if not isinstance(_old_esinput, str):
            raise Exception("Unknown old_esinput path")
        self._esinput = Path(_esinput)
        self._esinput.mkdir(exist_ok=True)
        print(f"Save data to ES, es_path:{self._esinput.as_posix()}")

    def scan_file(self):
        """
        扫描输入的文件
        根据文件后缀进行分类,将文件放入待处理队列
        :return:
        """
        while True:
            try:
                for file in self._input.iterdir():
                    name = file.name
                    # 全部移动到tmp目录下去
                    tmpname = self._tmp / name
                    # file.replace(tmpname)
                    with self.__scan_file_locker:
                        # 这个文件得尽快移动到tmp文件夹,不然下次扫描又会扫描到它就会出问题
                        shutil.move(file.as_posix(), tmpname.as_posix())
                    try:
                        if tmpname.suffix == self.iscan_suffix:
                            # 只进行复制操作
                            # source: Path = self._input / name
                            target: Path = self._dbu_input / name
                            copyfile(tmpname.as_posix(), target.as_posix())
                            self.iscan_task_queue.put(target)
                            print(
                                f"Backup iscan_search data, filename:{file.as_posix()}")
                    except:
                        print(
                            f'Scan list file error, err:{traceback.format_exc()}')
                    finally:
                        # 最后无论如何都需要将文件输出到esinput
                        if self.copy_esinput_enable:
                            outname = self._esinput / name
                            tmpname.replace(outname)
                        # 一般来说是不会有文件存在的,但是意外不可避免嘛, 所以这里做一个判定,如果还存在文件就删了
                        if tmpname.exists():
                            tmpname.unlink()
            except:
                print(f'Scan task file error, err:{traceback.format_exc()}')
                continue
            finally:
                print("There is no scan data to back up")
                time.sleep(0.5)

    def _process_file(self, tmpfile: Path):
        """
        读取文件里面的数据打开一下,获取到信息后再关上
        """
        with tmpfile.open('r', encoding='utf-8') as fp:
            j_text = fp.read()
            d_text = json.loads(j_text)
            # scan_time = d_text.get('time')
            # if scan_time is None:
            # scan_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            try:
                country = d_text.get('geoinfo').get('country').get('code')
            except:
                country = 'UNKNOWN'
        return country

    def back_file(self):
        """
        开始备份数据,先保存到文件夹
        当这个文件夹到达一定大小然后压缩保存
        :return:
        """
        got = False
        while True:
            got = False
            if self.iscan_task_queue.empty():
                time.sleep(0.5)
                continue
            try:
                bfile: Path = self.iscan_task_queue.get()
                got = True
                name = bfile.name
                # 现在直接读文件里面的国家和日期
                country = self._process_file(bfile)
                # 每次保存之前去判断下是否需要修改文件名字并进行压缩备份
                date_now_str = datetime.datetime.now().strftime("%Y-%m-%d")
                # 新建文件夹的时候需要锁一下,其他时候直接移动即可
                with self.__file_locker:
                    # 先把文件移动过去
                    dirname: Path = self._databack / country / date_now_str
                    dirname.mkdir(exist_ok=True, parents=True)
                # 移过去的文件名
                filename = dirname / name
                # 移动到目标文件夹
                bfile.replace(filename)
                print(
                    f"Backup file, country:{country}, filename:{name}, date:{date_now_str}")
            except:
                print(f'Backup file error:\n{traceback.format_exc()}')
            finally:
                if got:
                    self.iscan_task_queue.task_done()

    def scan_zip_file(self):
        """
        压缩文件的线程,每天去扫描一次
        将昨天的文件夹压缩到压缩文件夹下
        """
        while True:
            try:
                date_now = datetime.datetime.now().date()
                for country in self._databack.iterdir():
                    if not country.exists():
                        continue
                    country_name = country.name
                    for d_file in country.iterdir():
                        if self._zip_dealing.__contains__(d_file):
                            continue
                        d_name = d_file.name
                        d_date = datetime.datetime.strptime(
                            d_name, "%Y-%m-%d").date()
                        # 如果是今天以前的数据那么就进行压缩
                        if date_now > d_date:
                            self._zip_queue.put((d_file, country_name))
                            with self._zipfile_locker:
                                # 加入正在处理队列
                                self._zip_dealing[d_file] = 1
                            print(
                                f"A file wait to zip, filename:{d_file.as_posix()}")
            except:
                print(f"Zip file error:\n{traceback.format_exc()}")
            finally:
                print("There is no scan data to zip")
                time.sleep(3600)

    def process_zip_file(self):
        """
        压缩今天以前的文件夹
        """
        got = False
        zipfile_path = None
        while True:
            got = False
            if self._zip_queue.empty():
                time.sleep(1)
                continue
            try:
                zipfile_path, country = self._zip_queue.get()
                got = True
                zip_store_file = self._zipdata / country
                zip_store_file.mkdir(exist_ok=True)
                zipname = zip_store_file/f"{zipfile_path.name}.zip"
                print(
                    f"Start zipfile, filename:{zipname.as_posix()}")
                # 增加一个写入限制
                with zipfile.ZipFile(zipname.as_posix(), 'a', zipfile.ZIP_DEFLATED) as write:
                    for file in zipfile_path.iterdir():
                        write.write(file.as_posix())
                        # 写入后删除
                        file.unlink()
                    write.close()
                # 最后删除已经压缩好的文件夹
                zipfile_path.rmdir()
                print(
                    f"Store zipfile success, filename:{zipname.as_posix()}")
            except:
                print(f"Zip file error:\n{traceback.format_exc()}")
            finally:
                if got:
                    self._zip_queue.task_done()
                    with self._zipfile_locker:
                        self._zip_dealing.pop(zipfile_path, None)

    def start(self):
        """
        项目启动
        :return:
        """
        thread1 = threading.Thread(target=self.scan_file, name="scanfile")
        thread1.start()
        for i in range(self.backup_thread):
            t = threading.Thread(target=self.back_file, name=f"backfile{i}")
            t.start()
        thread2 = threading.Thread(
            target=self.scan_zip_file, name=f"scan_zipfile")
        thread2.start()
        for j in range(self.zip_thread):
            tz = threading.Thread(
                target=self.process_zip_file, name=f"zipfile{j}")
            tz.start()
Example #39
0
class SubscribeListener(SubscribeCallback):
    def __init__(self):
        self.connected = False
        self.connected_event = Event()
        self.disconnected_event = Event()
        self.presence_queue = Queue()
        self.message_queue = Queue()
        self.channel_queue = Queue()
        self.uuid_queue = Queue()
        self.membership_queue = Queue()

    def status(self, pubnub, status):
        if utils.is_subscribed_event(
                status) and not self.connected_event.is_set():
            self.connected_event.set()
        elif utils.is_unsubscribed_event(
                status) and not self.disconnected_event.is_set():
            self.disconnected_event.set()

    def message(self, pubnub, message):
        self.message_queue.put(message)

    def presence(self, pubnub, presence):
        self.presence_queue.put(presence)

    def wait_for_connect(self):
        if not self.connected_event.is_set():
            self.connected_event.wait()
        else:
            raise Exception("the instance is already connected")

    def channel(self, pubnub, channel):
        self.channel_queue.put(channel)

    def uuid(self, pubnub, uuid):
        self.uuid_queue.put(uuid)

    def membership(self, pubnub, membership):
        self.membership_queue.put(membership)

    def wait_for_disconnect(self):
        if not self.disconnected_event.is_set():
            self.disconnected_event.wait()
        else:
            raise Exception("the instance is already disconnected")

    def wait_for_message_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            env = self.message_queue.get()
            self.message_queue.task_done()
            if env.channel in channel_names:
                return env
            else:
                continue

    def wait_for_presence_on(self, *channel_names):
        channel_names = list(channel_names)
        while True:
            env = self.presence_queue.get()
            self.presence_queue.task_done()
            if env.channel in channel_names:
                return env
            else:
                continue
Example #40
0
class MainLoop(object):
    """A main loop that can optionally be integrated into an existing loop."""
    def __init__(self, map, poke=ignore):
        self.map = map
        self.queue = Queue()
        self.poke = poke
        self.running = False
        self.selectthread = None

    def start(self):
        """Starts async polling"""
        if self.running:
            print "already running"
            return
        self.running = True
        self.poll_once()

    def stop(self):
        """Stops async polling"""
        self.running = False
        # XXX: we could cancel the current thread too...

    def process(self):
        try:
            msg = self.queue.get(True, 1)
            self.selectthread = None
            if self.running:
                self.handle_poll_results(msg)
                self.poll_once()
            self.queue.task_done()
        except Empty:
            pass

    def block(self):
        while self.running:
            self.process()

    def poll_once(self):
        print "polling"
        if self.selectthread != None:
            raise ValueError("Already running!")

        r = []
        w = []
        e = []
        for fd, obj in self.map.items():
            is_r = obj.readable()
            is_w = obj.writable()
            if is_r:
                r.append(fd)
            # accepting sockets should not be writable
            if is_w and not obj.accepting:
                w.append(fd)
            if is_r or is_w:
                e.append(fd)
        if [] == r == w == e:
            print "UNHANDLED CONDITION DEAL WITH THIS"

        self.selectthread = threading.Thread(
            target=thread_poll, args=[r, w, e, self.queue, self.poke])
        self.selectthread.daemon = True
        self.selectthread.start()

    def handle_poll_results(self, msg):
        if "error" in msg:
            print "I don't know how we handle errors: %s" % (msg["error"], )
            return

        r, w, e = msg["fds"]

        map = self.map

        for fd in r:
            obj = map.get(fd)
            if obj is None:
                continue
            asyncore.read(obj)

        for fd in w:
            obj = map.get(fd)
            if obj is None:
                continue
            asyncore.write(obj)

        for fd in e:
            obj = map.get(fd)
            if obj is None:
                continue
            asyncore._exception(obj)
Example #41
0
class QiubaiSpdier(object):
    def __init__(self):
        self.url_temp = "https://www.qiushibaike.com/8hr/page/{}/"

        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
        }
        self.url_queue = Queue()
        self.html_queue = Queue()
        self.content_queue = Queue()

    def get_url_list(self):
        for i in range(1, 14):
            self.url_queue.put(self.url_temp.format(i))

    def parse_url(self):
        """获取html页面"""
        while True:
            url = self.url_queue.get()
            response = requests.get(url, headers=self.headers)
            self.html_queue.put(response.content.decode())
            self.url_queue.task_done()  # 计数减1

    def get_content_list(self):
        """解析html页面"""
        while True:
            html_str = self.html_queue.get()
            html = etree.HTML(html_str)
            div_list = html.xpath("//div[@id='content-left']/div")  # 分组
            content_list = list()
            for div in div_list:
                item = {}
                item["content"] = div.xpath(
                    ".//div[@class='content']/span/text()")
                item["content"] = [
                    i.replace("\n", "") for i in item["content"]
                ]
                item["author_gender"] = div.xpath(
                    ".//div[contains(@class,'articleGender')]/@class")
                item["author_gender"] = item["author_gender"][0].split(
                    " ")[-1].replace(
                        "Icon", "") if len(item["author_gender"]) > 0 else None
                item["auhtor_age"] = div.xpath(
                    ".//div[contains(@class,'articleGender')]/text()")
                item["auhtor_age"] = item["auhtor_age"][0] if len(
                    item["auhtor_age"]) > 0 else None
                item["content_img"] = div.xpath(
                    ".//div[@class='thumb']/a/img/@src")
                item["content_img"] = "https:" + item["content_img"][0] if len(
                    item["content_img"]) > 0 else None
                item["author_img"] = div.xpath(
                    ".//div[@class='author clearfix']//img/@src")
                item["author_img"] = "https:" + item["author_img"][0] if len(
                    item["author_img"]) > 0 else None
                item["stats_vote"] = div.xpath(
                    ".//span[@class='stats-vote']/i/text()")
                item["stats_vote"] = item["stats_vote"][0] if len(
                    item["stats_vote"]) > 0 else None
                content_list.append(item)
            self.content_queue.put(content_list)
            self.html_queue.task_done()

    def save_content_list(self):
        """保存"""
        while True:
            content_list = self.content_queue.get()
            for i in content_list:
                print(i)
            self.content_queue.task_done()

    def run(self):  # 实现主要逻辑
        t_list = []
        # 1.url_list
        t_url = threading.Thread(target=self.get_url_list)
        t_list.append(t_url)
        # 2.遍历,发送请求,获取响应
        for i in range(10):  # 执行多个请求线程
            t_parse = threading.Thread(target=self.parse_url)
            t_list.append(t_parse)
        # 3.提取数据
        for i in range(2):
            t_content = threading.Thread(target=self.get_content_list)
            t_list.append(t_content)
        # 4.保存
        t_save = threading.Thread(target=self.save_content_list)
        t_list.append(t_save)
        for t in t_list:
            t.setDaemon(True)  # 把子线程设置为守护线程,主线程结束,子线程立即结束
            t.start()

        for q in [self.url_queue, self.html_queue, self.content_queue]:
            q.join()  # 让主线程等待队列的任务完成之后再结束
Example #42
0
class ScanTools(ScanPlugBase):
    def __init__(self, task: IscanTask):
        ScanPlugBase.__init__(self, task)
        self.task: IscanTask = task
        self.nmap = Nmap()
        self.zgrab2 = Zgrab2()
        self.zmap = Zmap()  # zmap和masscan的功能一样的,现在尝试使用masscan来试下呢
        self.logicalgrabber = LogicalGrabber()
        # 新增查询ip归属地,modify by judy 2020/03/31
        self.dbip = DbipMmdb()
        # 初始化的时候去拿cmd里面的host,因为支持了国家二字码,所以单独开一个方法来获取
        self.re_iprang = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{2}")
        # 新增所有C段ip计数,用于计算进度
        self.__c_ipranges_count = 0
        self.__all_scan_count = 0
        # 已经扫描了的ip段统计
        self.__has_scan_count = 0
        # 目前的扫描进度:98.99 %
        self.__progress: float = 0.00
        # 端口队列,用于多线程调用zmap
        self.port_queue = Queue()
        # 使用字典,便于去重,modify by judy 2020/08/06
        # 为了提高效率,将单个ip和C段IP分开处理
        self.hosts = {}
        self.host = {}
        # 文件处理锁,可能会出现同名的文件所以需要加文件锁 modify by judy 20210203
        self._file_locker = threading.RLock()
        # zmap 待处理的队列,默认为运行状态
        self.make_zmap_scan_queue_status = True
        # zmap处理队列
        self.zmap_queue = Queue()
        self.__zmap_scan_thread_state = {}

        # namp处理的队列
        self.nmap_queue = Queue()
        self.__nmap_scan_thread_state = {}
        self._nmap_tmp_dict_locker = threading.RLock()
        self._nmap_tmp = {}

        # zgrab2处理的队列
        self.zgrab2_queue = Queue()
        self.__zgrab2_scan_thread_state = {}

        # 20200917新增的漏洞扫描需求要求匹配漏洞并扫描
        self.vulns_queue = Queue()
        self.__vulns_scan_thread_state = {}
        self._vulns_list = self.task.cmd.stratagyscan.scan.vuls

        # 最后的结果线程
        self.output_res_queue = Queue()
        # 目前有两种数据来源,一种是手动输入的;另外一种是查询国家得到的ip段,
        # 因为查询国家得到的ip段比较多,所以需要一个标志来分辨下
        # 默认是手动数据,数据来源为国家数据时,则得到的结果一定是seres对象
        self.country_flag = False
        # 保存端口扫描进度
        self.sp = Path("./scan_port_progress.txt")

    def process_host(self):
        """
        处理传入的需要扫描的host
        :return:
        """
        log = "开始处理IP段"
        self._logger.debug("Start process ip ranges")
        self._outprglog(log)
        cmdhost: list = self.task.cmd.stratagyscan.scan.hosts
        location: dict = self.task.cmd.stratagyscan.scan.location
        # 以host的Ip段为主
        if cmdhost is not None and len(cmdhost) > 0:
            ip_ranges = cmdhost
        else:
            self.country_flag = True
            # 国家的二字码查出来就实在是太多了
            ip_ranges = self.get_country_iprange(location)
        # modify by judy 2020/08/06优化的本地IP查询,然后优化了根据国家数据查询ip段过多和去重的问题
        self.split_long_ip_ranges(ip_ranges)
        self._logger.debug("Complete process ip ranges")
        # 这里出来就直接将ipranges的数据存入了self.hosts这里也进行一下置零 by judy 20201202
        ip_ranges = None
        return

    def process_port(self):
        """
        尝试by judy 2020/03/30
        将port加入队列,用于zmap多线程取用
        顺便统计下一共需要扫描的数据 modify by judy 2020/04/08
        :return:
        """
        ports = self.task.cmd.stratagyscan.scan.ports
        self._logger.debug(f"Get input {len(ports)} scan port")
        for p in ports:
            self.port_queue.put(p)
        # 这里统计所有需要扫描的网段计数集合
        self.__all_scan_count = self.port_queue.qsize() * self.__c_ipranges_count
        self._logger.debug(f"There are {self.__all_scan_count} objects will be scan")
        with open("./scan_rate_test_result.txt", "a", encoding="utf-8") as fp:
            fp.write(f"总共有{len(ports)}个端口需要扫描, 总共将有{self.__all_scan_count}个目标需要扫描")
        return

    def split_long_ip_ranges(self, inputipdata):
        """
        拆分长网段1.1.0.0/14 -> 1.1.1.0/24,1.1.2.0/24....
        seres.conn = conn
        seres.res = res
        :return:
        """
        self._logger.debug("Start split long ip ranges to C ip ranges")
        if self.country_flag:
            # 国家数据
            ip_ranges = inputipdata.res
            log = "开始处理地区IP数据"
            self._logger.debug("Get region ip ranges")
            self._outprglog(log)
        else:
            ip_ranges = inputipdata
            log = "开始处理指定IP数据"
            self._logger.debug("Get specific ipranges")
            self._outprglog(log)
        # 存放内网数据
        intranet = []
        for el in ip_ranges:
            # 这里是处理1.1.0.0/24这个网段的
            if isinstance(el, str):
                if (
                    not self.country_flag
                    and IpProcessTools.judge_reserved_ip_addresses(el)
                ):
                    intranet.append(el)
                    self.__c_ipranges_count += 1
                else:
                    try:
                        low_ip = IPy.IP(el)
                        if low_ip.prefixlen() < 24:
                            count = 0
                            for ip in low_ip:
                                if count == 0:
                                    self.__c_ipranges_count += 1
                                    ipstr_list = [str(ip) + "/24"]
                                    self.hosts[tuple(ipstr_list)] = 1
                                count += 1
                                if count == 256:
                                    # 到这里就是一个网段了
                                    count = 0
                        else:
                            self.__c_ipranges_count += 1
                            ipstr_list = [el]
                            self.hosts[tuple(ipstr_list)] = 1
                    except:
                        # 不是ip段,有可能是域名或者其他东西
                        self.__c_ipranges_count += 1
                        ipstr_list = [el]
                        self.host[tuple(ipstr_list)] = 1
            elif isinstance(el, tuple):
                # 这里的数据只能是国家的,所以不需要去重
                # masscan扫描尝试这样
                st = IPy.IP(el[0])
                sp = IPy.IP(el[1])
                # 直接+/24是不准确的,因为很多查出来的数据并没有255个IP
                # 这种数据目前考虑的是单独处理下,但是同时又会带来需要查询的ip过多的问题,先这样做吧,modify by judy 2020/07/22
                if sp.int() - st.int() >= 255:
                    count = 0
                    for i in range(st.int(), sp.int() + 1):
                        if count == 0:
                            self.__c_ipranges_count += 1
                            ipstr: str = IPy.IP(i).strNormal()
                            if ipstr.endswith(".0"):
                                ipstr_list = [(ipstr + "/24")]
                                self.hosts[tuple(ipstr_list)] = 1
                        count += 1
                        if count == 256:
                            count = 0
                else:
                    iptmp = []
                    for i in range(st.int(), sp.int() + 1):
                        o_ipstr: str = IPy.IP(i).strNormal()
                        if o_ipstr.endswith(".0"):
                            continue
                        iptmp.append(o_ipstr)
                    # 这里也表示一个ip段,所以是这个问题才导致了那个进度计算出问题
                    self.__c_ipranges_count += 1
                    self.host[tuple(iptmp)] = 1

            else:
                raise Exception("Unsupported type")

        # 增加内网扫描
        if len(intranet) > 0:
            for oneport in self.task.cmd.stratagyscan.scan.ports:
                with self._file_locker:
                    nmap_scan_host_path = self.tmppath / f"{str(uuid.uuid1())}"
                for shost in intranet:
                    with nmap_scan_host_path.open("a", encoding="utf-8") as fp:
                        fp.write(shost + "\n")
                self.nmap_queue.put((nmap_scan_host_path, [oneport]))

            self._logger.debug(f"本次一共扫描内网{len(intranet)}个目标")

        log = f"一共需要扫描{self.__c_ipranges_count}个IP C段"
        with open("./scan_rate_test_result.txt", "a", encoding="utf-8") as fp:
            fp.write(log + "\n")
        self._logger.info(f"Get {self.__c_ipranges_count} ip ranges")
        self._outprglog(log)
        # 如果是国家查询的ip段,那么需要调用下回调结束函数关闭查询端口
        if self.country_flag:
            inputipdata.sedone()
        self._logger.debug("Complete split long ip ranges to C ip ranges")

    def get_country_iprange(self, countryinfo: dict):
        """
        获取国家二字码的ip段
        在dbip里面选取相关的数据
        :return:
        """
        self._logger.debug("Start get local ip ranges result")
        ip_rangs = None
        try:
            country = countryinfo.get("country")
            province = countryinfo.get("province")
            city = countryinfo.get("city")
            geoid = countryinfo.get("citycode")
            # sa = requests.session()
            # # 开启一个session去拿首页,拿一些访问头和Cookie信息
            # sa.get("http://ipblock.chacuo.net/")
            # # 这里只考虑了C段网络,可能会有A段和B段,后续再加
            # res = sa.get(f'http://ipblock.chacuo.net/down/t_txt=c_{country_code}')
            # res_text = res.text
            # ip_rangs = self.re_iprang.findall(res_text)
            # self._logger.info(f"Start get {country_code} ip range")
            ip_rangs = GeoIPLoc.get_location_ipranges(country, province, city, geoid)
            # 这里是一定能返回一个seres对象的,这里是为了防止sqlite建立了过多的连接,虽然代码没有出过问题
            # 但是为了以防万一还是做了错误处理
            self._logger.info("Complete get local ip ranges result")
        except:
            self._logger.error(
                f"Get country ip rangs error, err:{traceback.format_exc()}"
            )
        return ip_rangs

    def _download_data(self) -> iter:
        """
        下载数据接口,最后返回的数据为dict,
        这里是数据下载流程的开始,新增暂停功能
        modify by judy 2020/06/03
        :return:
        """
        # 为了计算进度,一定是先处理host再处理port
        self.process_host()
        self.process_port()
        # 不间断的获取停止标识
        # t = threading.Thread(target=self._get_stop_sign, name="stop_singn_scan")
        # t.start()
        # 1、zmap快速发现开放端口
        # 想要开线程这里就得放在队列里,搞成一共在运行的线程
        mzsq = threading.Thread(
            target=self.make_zmap_scan_queue, name="make_zmap_scan_queue"
        )
        mzsq.start()
        for i in range(max_zscan_threads):
            t = threading.Thread(target=self.zmap_scan, name=f"zmap_threads{i}")
            t.start()
        for j in range(max_nscan_threads):
            jthread = threading.Thread(target=self.nmap_scan, name=f"scan_threads{j}")
            jthread.start()

        for m in range(max_zgrab2_threads):
            mthread = threading.Thread(
                target=self.zgrab2_scan, name=f"zgrab2_threads{m}"
            )
            mthread.start()

        for n in range(max_vulns_threads):
            nthreads = threading.Thread(
                target=self.vulns_scan, name=f"vulns_threads{n}"
            )
            nthreads.start()
        # t = threading.Thread(target=self._scan_status, name="Monitor scan status")
        # t.start()
        ossq = threading.Thread(target=self.output_res, name=f"output_result")
        ossq.start()
        ossq.join()
        # 程序执行完成
        self._running = False
        # 扫描完成应该给一个100%
        self._logger.info("All scan complete")
        self.task.progress = 1
        self._write_iscantaskback(ECommandStatus.Dealing, "扫描完成:100%")
        log = f"此次IP刺探任务已完成,总共刺探到了{self.output_count}条数据"
        self._outprglog(log)
        return
        yield None

    def make_zmap_scan_queue(self):
        """
        生成zmap扫描的队列,主要是为了保证速率
        modify by judy 2020/06/03
        如果停止了那么也就不继续制作zmap扫描数据了
        port :
        host :list []
        :return:
        """
        self._logger.info("Start make zmap scan data and insert to zmap queue")
        self.make_zmap_scan_queue_status = True
        save_port_count = None
        save_host_count = None
        if self.sp.exists():
            save_str = self.sp.read_text()
            if save_str is not None and save_str != "":
                save_list = save_str.split(" ")
                save_port_count = int(save_list[0])
                save_host_count = int(save_list[1])
        # 这种中继数据只使用一次
        host_count = 0
        port_count = 0
        got = False
        while True:
            if self.port_queue.empty() or self._stop_sign:
                # 运行结束
                self.make_zmap_scan_queue_status = False
                self._logger.info("Complete make zmap scan data")
                break
            got = False
            port = self.port_queue.get()
            port_count += 1
            got = True
            if save_port_count is not None and save_port_count > port_count:
                continue
            elif save_port_count == port_count:
                # 找到了当前续传的port就将这个数据删除了
                self._logger.info(
                    f"Continue download, skip {save_port_count} port, start from port:{port.port}"
                )
                save_port_count = None
                pass
            tmp_hosts = []
            try:
                # C段的ip
                for host in self.hosts.keys():
                    host_count += 1
                    if (
                        save_host_count is not None
                        and save_host_count > 0
                        and save_host_count > host_count
                    ):
                        continue
                    elif save_host_count == host_count:
                        self._logger.info(
                            f"Continue download, skip {save_host_count} host, start from host:{host}"
                        )
                        save_host_count = None
                        pass
                    # 记录目前扫描到了哪个host
                    line = f"{port_count} {host_count-self.zmap_queue.qsize()}"
                    self.sp.write_text(line)
                    # self._logger.info(f"Write line:{line}")
                    # 唯一元组转换成列表
                    host = list(host)
                    tmp_hosts.extend(host)
                    if len(tmp_hosts) > max_zscan_ipranges:
                        while self.zmap_queue.qsize() > max_zscan_threads * 10:
                            self._logger.debug(
                                f"Zmap scan queue over {max_zscan_threads*10}, too many objects to scan, wait 10 second"
                            )
                            time.sleep(10)
                        self.zmap_queue.put((tmp_hosts, port))
                        # 复原tmp_host
                        tmp_hosts = []
                # 单个的IP或者是host
                for ip in self.host.keys():
                    host_count += 1
                    if (
                        save_host_count is not None
                        and save_host_count > 0
                        and save_host_count > host_count
                    ):
                        continue
                    elif save_host_count == host_count:
                        self._logger.info(
                            f"Continue download, skip {save_host_count} host, start from host:{ip}"
                        )
                        save_host_count = None
                        pass
                    # 记录目前扫描到了哪个host
                    line = f"{port_count} {host_count-self.zmap_queue.qsize()}"
                    self.sp.write_text(line)
                    host = list(ip)
                    tmp_hosts.extend(host)
                    if len(tmp_hosts) > max_zscan_ip:
                        while self.zmap_queue.qsize() > max_zscan_threads * 10:
                            self._logger.debug(
                                f"Zmap scan queue over {max_zscan_threads*10}, too many objects to scan, wait 10 second"
                            )
                            time.sleep(10)
                        self.zmap_queue.put((tmp_hosts, port))
                        # 复原tmp_host
                        tmp_hosts = []
            except:
                self._logger.error(
                    f"Put hosts port to zmap scan queue error\nport:{port.port}\nerror:{traceback.format_exc()}"
                )
            finally:
                # 最后执行完成查看下该端口下还有没有剩余的hosts
                if len(tmp_hosts) > 0:
                    self.zmap_queue.put((tmp_hosts, port))
                if got:
                    self.port_queue.task_done()
        # 当前函数执行完成后手动释放下dict,by swm 20201012
        self.hosts = None
        self.host = None

    def _make_back_progress(self):
        """
        扫描进度的回馈
        这个扫描进度,不应该考虑到扫描整个国家的情况
        那就直接在imap里面算,
        但是这个东西是并行的怎么算
        唉,先写着用用吧,by Judy 2020/04/07
        :return:
        """
        try:
            progress = round(self.__has_scan_count / self.__all_scan_count, 2)
            if progress - self.__progress > 0.001:
                # 这里的进度估算并不准确,有时会超过1
                if progress > 1.0:
                    progress = 0.999
                self.__progress = progress
                self.task.progress = progress
                self._logger.info(f"Scan progress:{float(progress * 100)}%")
                self._outprglog(f"正在扫描:{float(progress * 100)}%")
                self._write_iscantaskback(
                    ECommandStatus.Dealing, f"正在扫描:{float(progress * 100)}%"
                )
        except:
            self._logger.error(f"Make progress error, err:{traceback.format_exc()}")
        return

    def _is_complete(self, scan_queue, scan_thread_state) -> bool:
        """
        这里判断任务是否完成,程序全部执行完成了返回true
        任务没有执行完成返回false
        modify by judy 2020/06/03
        如果任务被中途暂停了那么直接停止
        :return:
        """
        if self._stop_sign:
            return True

        complete = False
        # 0表示线程还没有开始
        if len(scan_thread_state) == 0:
            return complete

        if scan_queue.empty() and True not in scan_thread_state.values():
            # 队列为空,并且已经没有任务正在执行中了
            complete = True
        return complete

    def zmap_scan(self):
        """
        使用zmap来发现存活的端口
        zmap和masscan的效果类似,现在尝试使用masscan来扫描
        换了试试
        :return:
        """
        # 当前线程的唯一标识,进来以后扫描就开始
        ident = threading.current_thread().ident
        cur_state = True
        self.__zmap_scan_thread_state[ident] = cur_state
        got = False
        while True:
            # 运行结束
            if (
                not self.make_zmap_scan_queue_status and self.zmap_queue.empty()
            ) or self._stop_sign:
                # 所有的端口已经扫描完成
                cur_state = False
                self.__zmap_scan_thread_state[ident] = cur_state
                self._logger.info(f"Zmap {ident} Scan complete")
                break

            if self.zmap_queue.empty():
                time.sleep(1)
                continue
            got = False
            hosts, t_port = self.zmap_queue.get()
            got = True
            # 这里去判断下, 如果是domain的话
            new_host, ip_domain_dict = IpProcessTools.judge_ip_or_domain(hosts)
            if len(new_host) == 0:
                self._logger.debug(f"Get no live hosts")
                continue
            log = f"开始探测{len(hosts)}个主机存活和端口开放情况, PORT:{t_port.port}, Protocol:{t_port.flag}"
            self._outprglog(log)
            # zmap每次扫描的很快,所以可以多给点ip段,但是命令行装不了那么多,所以以文件的方式传参注意创建文件和删除文件,by judy 2020/08/20
            with self._file_locker:
                zmap_scan_host_path = self.tmppath / f"{str(uuid.uuid1())}"
            for shost in new_host:
                with zmap_scan_host_path.open("a", encoding="utf-8") as fp:
                    fp.write(shost + "\n")
            self._logger.debug(
                f"Start Zmap thread scan an object, zmap thread id: {ident}"
            )
            try:
                for port_info in self.zmap.scan_open_ports(
                    self.task, 1, zmap_scan_host_path, [t_port]
                ):
                    if port_info is None:
                        continue
                    ip = port_info._host
                    o_host = ip_domain_dict.get(ip)
                    if o_host is not None:
                        zmapres = o_host
                    else:
                        zmapres = ip
                    with self._nmap_tmp_dict_locker:
                        self.process_nmap_data(zmapres, t_port)
                self.__has_scan_count += 1
            except:
                self._logger.error(
                    f"Zmap scan port error\nport:{t_port.port} protocol:{t_port.flag}\nerror:{traceback.format_exc()}"
                )
            finally:
                if got:
                    self.zmap_queue.task_done()
                if zmap_scan_host_path.exists():
                    zmap_scan_host_path.unlink()
                self._logger.debug(f"Zmap {ident} complete scan an object")
        # 退出break后
        if True not in self.__zmap_scan_thread_state.values():
            with self._nmap_tmp_dict_locker:
                self.process_nmap_data(None, None, True)

    def process_nmap_data(self, ip, port, zscan_stop_flasg=False):
        """
        处理nmap的数据
        将nmap累加到一定的数量后再放到nmap扫描
        这样nmap的速率会比较均衡
        """
        if ip is not None and port is not None:
            if self._nmap_tmp.__contains__(port):
                self._nmap_tmp[port].append(ip)
                if len(self._nmap_tmp.get(port)) >= max_nmap_ip:
                    with self._file_locker:
                        nmap_scan_path = self.tmppath / f"{str(uuid.uuid1())}"
                    with nmap_scan_path.open("a", encoding="utf-8") as fp:
                        fp.writelines([ip + "\n" for ip in self._nmap_tmp.get(port)])

                    # 队列不宜堆积过多
                    while self.nmap_queue.qsize() > max_nscan_threads * 10:
                        self._logger.debug(
                            f"Nmap scan queue over {max_nscan_threads*10}, too many objects to scan, wait 20 seconds"
                        )
                        time.sleep(10)
                    self.nmap_queue.put((nmap_scan_path, port))
                    # 入队后出栈
                    self._nmap_tmp.pop(port)
            else:
                self._nmap_tmp[port] = [ip]
        # 判断一下zmap是否结束了
        if zscan_stop_flasg:
            for port, ipranges in self._nmap_tmp.items():
                with self._file_locker:
                    nmap_scan_path = self.tmppath / f"{str(uuid.uuid1())}"
                with nmap_scan_path.open("a", encoding="utf-8") as fp:
                    fp.writelines([ip + "\n" for ip in ipranges])
                self.nmap_queue.put((nmap_scan_path, port))
            # 将所有的nmap加入队列后就将这个数据清空
            self._nmap_tmp = {}
        return

    def nmap_scan(self):
        """
        nmap
        这里会使用多线程去处理已经查到开放了的端口
        :return:
        """
        ident = threading.current_thread().ident
        cur_state = True
        self.__nmap_scan_thread_state[ident] = cur_state
        got = False
        while True:
            # 扫描完成退出
            if self._is_complete(self.nmap_queue, self.__zmap_scan_thread_state):
                cur_state = False
                self.__nmap_scan_thread_state[ident] = cur_state
                self._logger.info(f"Nmap {ident} scan complete")
                break
            # 查看下队列里面还有没有东西
            if self.nmap_queue.empty():
                time.sleep(1)
                continue
            got = False
            ips_path, port = self.nmap_queue.get()
            got = True
            log = f"开始探测主机协议: PORT:{port.port} protocol:{port.flag}"
            self._logger.info(
                f"Start nmap {ips_path.as_posix()}, port:{port.port} protocol:{port.flag}"
            )
            self._outprglog(log)
            try:
                tmp_zgrab2_dict = {}
                self._logger.debug(f"Start Nmap scan an object, nmap thread id:{ident}")
                for portinfo in self.nmap.scan_open_ports_by_file(
                    self.task, 1, ips_path.as_posix(), [port], outlog=self._outprglog
                ):
                    if not isinstance(portinfo, PortInfo):
                        continue
                    # 这里出来的全是一个端口下的东西,尼玛的还好这里去重了
                    tmp_zgrab2_dict[portinfo._host] = portinfo

                if len(tmp_zgrab2_dict) > 0:
                    while self.zgrab2_queue.qsize() > max_zgrab2_threads * 10:
                        # 一分钟去检测一次队列的处理情况
                        self._logger.debug(
                            f"Nmap Threading id:{ident},Zgrab2 scan queue over {max_zgrab2_threads*10}, too many objects to scan, wait 20 second"
                        )
                        time.sleep(20)
                    self.zgrab2_queue.put((tmp_zgrab2_dict, port))
                    log = f"探测主机协议完成:{list(tmp_zgrab2_dict.keys())}"
                    self._logger.info(
                        f"Get nmap result {tmp_zgrab2_dict.__len__()} ips and put into zgrab2"
                    )
                    self._outprglog(log)
            except:
                self._logger.error(
                    f"Nmap scan port info error, id:{ident}, err:{traceback.format_exc()}"
                )
            finally:
                if got:
                    self.nmap_queue.task_done()
                    # 回馈进度
                    self._make_back_progress()
                    # 删除文件
                    try:
                        if ips_path.exists():
                            ips_path.unlink()
                    except:
                        self._logger.error(
                            f"Delete zmap res path error, err:{traceback.format_exc()}"
                        )
                    self._logger.debug(
                        f"Complete Nmap scan an object, nmap thread id:{ident}"
                    )

    def zgrab2_scan(self):
        """
        zgrab2扫描
        port:PortInfo
        :return:
        """
        # 当前线程的唯一标识,进来以后扫描就开始
        ident = threading.current_thread().ident
        cur_state = True
        self.__zgrab2_scan_thread_state[ident] = cur_state
        got = False
        while True:
            # 运行结束
            if self._is_complete(self.zgrab2_queue, self.__nmap_scan_thread_state):
                cur_state = False
                self.__zgrab2_scan_thread_state[ident] = cur_state
                self._logger.info(f"Zgrab2 {ident} scan complete")
                break
            if self.zgrab2_queue.empty():
                time.sleep(0.1)
                continue
            got = False
            portinfo_dict, port = self.zgrab2_queue.get()
            got = True
            log = f"开始协议详情探测:{list(portinfo_dict.keys())}"
            self._logger.info(
                f"Start zgrab2 scan {len(portinfo_dict)} ips, zgrab2 thread id:{ident}"
            )
            self._outprglog(log)
            try:
                self._scan_application_protocol(1, portinfo_dict, port)
                self._logger.debug(f"Complete Zgrab2 scan, zgrab2 thread id:{ident}")
                for portinfo in portinfo_dict.values():
                    while self.vulns_queue.qsize() > max_vulns_threads * 10:
                        self._logger.debug(
                            f"Zgrab2 threading id:{ident},Vulns scan queue over {max_vulns_threads*10} objects, too many data, wait 20 second"
                        )
                        time.sleep(20)
                    self.vulns_queue.put(portinfo)
                log = f"协议详情探测: 获取到{len(portinfo_dict)}个结果"
                self._logger.info(f"Put {len(portinfo_dict)} objects to vuls queue")
                self._outprglog(log)
            except Exception as err:
                self._logger.error(f"Zgrab2 scans error, err:{err}")
            finally:
                # 手动释放下dict对象
                if got:
                    portinfo_dict = None
                    self.zgrab2_queue.task_done()

    def _scan_application_protocol(self, level: int, port_info_dict, port):
        """
        根据 portinfo 的协议类型,扫描其应用层协议
        增加效率,每次扫描一个网段的数据,不再去扫描一个单一的那样太慢了
        """
        try:
            # 这个ports是直接关联的最开始处理的那个port,所以直接取第一个,modify by judy
            port = port.port
            # 先去扫一遍:
            if port != 80:
                self.zgrab2.get_tlsinfo(
                    self.task, level, port_info_dict, port, outlog=self._outprglog
                )
            tmpdict = {}
            # 没有协议的端口
            portdict = {}
            # 进行协议分类
            for k, v in port_info_dict.items():
                # k是ip, v是portinfo
                service = v.service
                if service is not None:
                    ser_dict = tmpdict.get(service)
                    # 判断里面有没有该类型的dict
                    if ser_dict is None:
                        tmpdict[service] = {}
                    # 添加
                    tmpdict[service][k] = v
                else:
                    # 没有协议只有端口的字典
                    portdict[k] = v
            # 扫描只有端口的应用层协议
            self._scan_port_application(level, portdict, port)
            # 拿到每一类的东西去扫描
            for service, service_dict in tmpdict.items():
                if service == "ftp":
                    self.zgrab2.get_ftp_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "ssh":
                    self.zgrab2.get_ssh_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "telnet":
                    self.zgrab2.get_telnet_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "smtp":
                    self.zgrab2.get_smtp_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service.__contains__("http") or service.__contains__("tcpwrapped"):
                    self.zgrab2.get_siteinfo(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "pop3":
                    self.zgrab2.get_pop3_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "ntp":
                    self.zgrab2.get_ntp_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "imap":
                    self.zgrab2.get_imap_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "mssql":
                    self.zgrab2.get_mssql_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "redis":
                    self.zgrab2.get_redis_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "mongodb":
                    self.zgrab2.get_mongodb_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "mysql":
                    self.zgrab2.get_mysql_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                elif service == "oracle":
                    self.zgrab2.get_oracle_info(
                        self.task, level, service_dict, port, outlog=self._outprglog
                    )
                else:
                    # 协议不在这里面的
                    self._scan_port_application(level, service_dict, port)
        except:
            self._logger.error(
                "Scan ip port application protocol error:\ntaskid:{}\nerror:{}".format(
                    self.task.taskid, traceback.format_exc()
                )
            )

    def _scan_port_application(self, level, portdict, port):
        """
        这个主要是为了扫描一些没有协议的端口,或者协议没在上面那个方法里的
        :return:
        """
        if len(portdict) == 0:
            return
        if port == 21:
            self.zgrab2.get_ftp_info(self.task, level, portdict, port)
        elif port == 22:
            self.zgrab2.get_ssh_info(self.task, level, portdict, port)
        elif port == 23:
            self.zgrab2.get_telnet_info(self.task, level, portdict, port)
        elif port == 25 or port == 465:
            self.zgrab2.get_smtp_info(self.task, level, portdict, port)
        elif port == 80 or port == 443:
            self.zgrab2.get_siteinfo(self.task, level, portdict, port)
        elif port == 110 or port == 995:
            self.zgrab2.get_pop3_info(self.task, level, portdict, port)
        elif port == 123:
            self.zgrab2.get_ntp_info(self.task, level, portdict, port)
        elif port == 143 or port == 993:
            self.zgrab2.get_imap_info(self.task, level, portdict, port)
        elif port == 1433:
            self.zgrab2.get_mssql_info(self.task, level, portdict, port)
        elif port == 6379:
            self.zgrab2.get_redis_info(self.task, level, portdict, port)
        elif port == 27017:
            self.zgrab2.get_mongodb_info(self.task, level, portdict, port)
        elif port == 3306:
            self.zgrab2.get_mysql_info(self.task, level, portdict, port)
        elif port == 1521:
            self.zgrab2.get_oracle_info(self.task, level, portdict, port)

    def vulns_scan(self):
        """
        漏洞扫描,回去扫描某个ip的具体页面
        由于是http连接非常耗性能,因此页面做了勾选和筛选
        """
        # 当前线程的唯一标识,进来以后扫描就开始
        ident = threading.current_thread().ident
        cur_state = True
        self.__vulns_scan_thread_state[ident] = cur_state
        got = False
        while True:
            # 运行结束
            if self._is_complete(self.vulns_queue, self.__zgrab2_scan_thread_state):
                cur_state = False
                self.__vulns_scan_thread_state[ident] = cur_state
                self._logger.info(f"Vulns {ident} scan complete")
                break
            if self.vulns_queue.empty():
                time.sleep(0.1)
                continue
            got = False
            portinfo = self.vulns_queue.get()
            got = True
            if len(self._vulns_list) > 0:
                log = f"开始漏洞扫描: {self._vulns_list}"
                self._logger.debug(
                    f"Start vulns scan {self._vulns_list}, vulns threading id:{ident}"
                )
                self._outprglog(log)
                self.logicalgrabber.grabbanner(
                    portinfo, self._vulns_list, flag="iscan", outlog=self._outprglog
                )
            try:
                self.output_res_queue.put(portinfo)
            except Exception as err:
                self._logger.error(f"vulns scan error, err:{err}")
            finally:
                if got:
                    self.vulns_queue.task_done()
                    if len(self._vulns_list) > 0:
                        self._logger.debug(
                            f"Stop vulns scan {self._vulns_list}, vulns threading id:{ident}"
                        )

    def output_res(self):
        """
        结果输出线程
        :return:
        """
        self._logger.info(f"Start output result thread")
        got = False
        while True:
            # 结束
            if self._is_complete(self.output_res_queue, self.__vulns_scan_thread_state):
                self._logger.info(f"Complete output result thread")
                # 正常结束就删除当前任务的扫描进度文件
                self.sp.unlink()
                break

            if self.output_res_queue.empty():
                time.sleep(0.1)
                continue
            try:
                got = False
                portinfo: PortInfo = self.output_res_queue.get()
                got = True
                file_port = portinfo._port
                ip = portinfo._host
                root: IP = IP(self.task, 1, ip)
                root.set_portinfo(portinfo)
                geoinfo, org, isp = self.dbip.get_ip_mmdbinfo(level=1, ip=ip)
                country_code = "unknown"
                if isinstance(geoinfo, GeoInfo):
                    root.set_geolocation(geoinfo)
                    country_code = geoinfo._country_code
                root.org = org
                root.isp = isp
                if root._subitem_count() > 0:
                    out_dict = root.get_outputdict()
                    # 输出锁,防止输出互锁,好像原本输出里面就有输出锁,先测试不用锁的情况看会不会出问题
                    # with self._file_locker:
                    file_name = f"{country_code}_{file_port}_{int(time.time() * 1000)}"
                    self._outputdata(out_dict, file_name=file_name)
                    self.output_count += 1
                if isinstance(portinfo, PortInfo):
                    del portinfo
            except:
                self._logger.error(f"Output result error: {traceback.format_exc()}")
            finally:
                if got:
                    self.output_res_queue.task_done()
Example #43
0
class Ventilator(Thread):
    def __init__(self, host, sync_port, sent_list):
        Thread.__init__(self)
        self.host = host
        self.sent_list = sent_list
        logging.debug("Job distributer attempting to bind to PUSH socket...")
        context = zmq.Context()
        self.socket = context.socket(zmq.REP)
        self.port = self.socket.bind_to_random_port("tcp://" + self.host)

        logging.debug("Ventilator successfully bound to PUSH socket.")

        self.sync_socket = context.socket(zmq.REQ)
        self.sync_socket.connect("tcp://%s:%s" % (self.host, sync_port))
        logging.debug("Ventilator connected to sync socket.")

        self.job_queue = Queue()

    def run(self):
        while True:
            ## Wait for signal to start:
            logging.debug("Ventilator waiting for permission to start")
            self.sync_socket.send(b'0')
            sync = self.sync_socket.recv_pyobj()

            if sync == b'0':
                break
            else:
                current_resource_sig = sync

            logging.debug("Ventilator received model signature sync signal")
            while not self.job_queue.empty():
                job_request = self.socket.recv_pyobj()
                worker_resource_sig = job_request.resource_sig

                if not resource_current(current_resource_sig,
                                        worker_resource_sig):
                    ## Send them a quit message:
                    quit_job = PyzmqJob(PyzmqJob.QUIT, None)
                    if job_request.request_size > 1:
                        quit_job = [quit_job]

                    self.socket.send_pyobj(quit_job)
                    continue

                jobs = []

                while not self.job_queue.empty() and len(
                        jobs) < job_request.request_size:
                    job = self.job_queue.get()
                    jobs.append(job)
                    self.job_queue.task_done()

                logging.log(logging.DEBUG - 1,
                            "Ventilator pushing job %d" % job.resource.index)

                if job_request.request_size > 1:
                    self.socket.send_pyobj(jobs)
                else:
                    self.socket.send_pyobj(jobs[0])

            logging.debug("Ventilator iteration finishing")

        logging.debug("Ventilator thread finishing")
        self.socket.close()
        self.sync_socket.close()
        logging.debug("All ventilator sockets closed.")

    def addJob(self, job):
        self.job_queue.put(job)
Example #44
0
class printcore():
    def __init__(self, port=None, baud=None, dtr=None):
        """Initializes a printcore instance. Pass the port and baud rate to
           connect immediately"""
        self.baud = None
        self.dtr = None
        self.port = None
        self.analyzer = gcoder.GCode()
        # Serial instance connected to the printer, should be None when
        # disconnected
        self.printer = None
        # clear to send, enabled after responses
        # FIXME: should probably be changed to a sliding window approach
        self.clear = 0
        # The printer has responded to the initial command and is active
        self.online = False
        # is a print currently running, true if printing, false if paused
        self.printing = False
        self.mainqueue = None
        self.priqueue = Queue(0)
        self.queueindex = 0
        self.lineno = 0
        self.resendfrom = -1
        self.paused = False
        self.sentlines = {}
        self.log = deque(maxlen=10000)
        self.sent = []
        self.writefailures = 0
        self.tempcb = None  # impl (wholeline)
        self.recvcb = None  # impl (wholeline)
        self.sendcb = None  # impl (wholeline)
        self.preprintsendcb = None  # impl (wholeline)
        self.printsendcb = None  # impl (wholeline)
        self.layerchangecb = None  # impl (wholeline)
        self.errorcb = None  # impl (wholeline)
        self.startcb = None  # impl ()
        self.endcb = None  # impl ()
        self.onlinecb = None  # impl ()
        self.loud = False  # emit sent and received lines to terminal
        self.tcp_streaming_mode = False
        self.greetings = ['start', 'Grbl ']
        self.wait = 0  # default wait period for send(), send_now()
        self.read_thread = None
        self.stop_read_thread = False
        self.send_thread = None
        self.stop_send_thread = False
        self.print_thread = None
        self.readline_buf = []
        self.selector = None
        self.event_handler = PRINTCORE_HANDLER
        # Not all platforms need to do this parity workaround, and some drivers
        # don't support it.  Limit it to platforms that actually require it
        # here to avoid doing redundant work elsewhere and potentially breaking
        # things.
        self.needs_parity_workaround = platform.system(
        ) == "linux" and os.path.exists("/etc/debian")
        for handler in self.event_handler:
            try:
                handler.on_init()
            except:
                logging.error(traceback.format_exc())
        if port is not None and baud is not None:
            self.connect(port, baud)
        self.xy_feedrate = None
        self.z_feedrate = None

    def addEventHandler(self, handler):
        '''
        Adds an event handler.
        
        @param handler: The handler to be added.
        '''
        self.event_handler.append(handler)

    def logError(self, error):
        for handler in self.event_handler:
            try:
                handler.on_error(error)
            except:
                logging.error(traceback.format_exc())
        if self.errorcb:
            try:
                self.errorcb(error)
            except:
                logging.error(traceback.format_exc())
        else:
            logging.error(error)

    @locked
    def disconnect(self):
        """Disconnects from printer and pauses the print
        """
        if self.printer:
            if self.read_thread:
                self.stop_read_thread = True
                if threading.current_thread() != self.read_thread:
                    self.read_thread.join()
                self.read_thread = None
            if self.print_thread:
                self.printing = False
                self.print_thread.join()
            self._stop_sender()
            try:
                if self.selector is not None:
                    self.selector.unregister(self.printer_tcp)
                    self.selector.close()
                    self.selector = None
                if self.printer_tcp is not None:
                    self.printer_tcp.close()
                    self.printer_tcp = None
                self.printer.close()
            except socket.error:
                logger.error(traceback.format_exc())
                pass
            except OSError:
                logger.error(traceback.format_exc())
                pass
        for handler in self.event_handler:
            try:
                handler.on_disconnect()
            except:
                logging.error(traceback.format_exc())
        self.printer = None
        self.online = False
        self.printing = False

    @locked
    def connect(self, port=None, baud=None, dtr=None):
        """Set port and baudrate if given, then connect to printer
        """
        if self.printer:
            self.disconnect()
        if port is not None:
            self.port = port
        if baud is not None:
            self.baud = baud
        if dtr is not None:
            self.dtr = dtr
        if self.port is not None and self.baud is not None:
            # Connect to socket if "port" is an IP, device if not
            host_regexp = re.compile(
                "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$|^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$"
            )
            is_serial = True
            if ":" in self.port:
                bits = self.port.split(":")
                if len(bits) == 2:
                    hostname = bits[0]
                    try:
                        port_number = int(bits[1])
                        if host_regexp.match(
                                hostname) and 1 <= port_number <= 65535:
                            is_serial = False
                    except:
                        pass
            self.writefailures = 0
            if not is_serial:
                self.printer_tcp = socket.socket(socket.AF_INET,
                                                 socket.SOCK_STREAM)
                self.printer_tcp.setsockopt(socket.IPPROTO_TCP,
                                            socket.TCP_NODELAY, 1)
                self.timeout = 0.25
                self.printer_tcp.settimeout(1.0)
                try:
                    self.printer_tcp.connect((hostname, port_number))
                    #a single read timeout raises OSError for all later reads
                    #probably since python 3.5
                    #use non blocking instead
                    self.printer_tcp.settimeout(0)
                    self.printer = self.printer_tcp.makefile('rwb',
                                                             buffering=0)
                    self.selector = selectors.DefaultSelector()
                    self.selector.register(self.printer_tcp,
                                           selectors.EVENT_READ)
                except socket.error as e:
                    if (e.strerror is None): e.strerror = ""
                    self.logError(
                        _("Could not connect to %s:%s:") %
                        (hostname, port_number) + "\n" +
                        _("Socket error %s:") % e.errno + "\n" + e.strerror)
                    self.printer = None
                    self.printer_tcp = None
                    return
            else:
                disable_hup(self.port)
                self.printer_tcp = None
                try:
                    if self.needs_parity_workaround:
                        self.printer = Serial(port=self.port,
                                              baudrate=self.baud,
                                              timeout=0.25,
                                              parity=PARITY_ODD)
                        self.printer.close()
                        self.printer.parity = PARITY_NONE
                    else:
                        self.printer = Serial(baudrate=self.baud,
                                              timeout=0.25,
                                              parity=PARITY_NONE)
                        self.printer.port = self.port
                    try:  #this appears not to work on many platforms, so we're going to call it but not care if it fails
                        self.printer.dtr = dtr
                    except:
                        #self.logError(_("Could not set DTR on this platform")) #not sure whether to output an error message
                        pass
                    self.printer.open()
                except SerialException as e:
                    self.logError(
                        _("Could not connect to %s at baudrate %s:") %
                        (self.port, self.baud) + "\n" +
                        _("Serial error: %s") % e)
                    self.printer = None
                    return
                except IOError as e:
                    self.logError(
                        _("Could not connect to %s at baudrate %s:") %
                        (self.port, self.baud) + "\n" + _("IO error: %s") % e)
                    self.printer = None
                    return
            for handler in self.event_handler:
                try:
                    handler.on_connect()
                except:
                    logging.error(traceback.format_exc())
            self.stop_read_thread = False
            self.read_thread = threading.Thread(target=self._listen,
                                                name='read thread')
            self.read_thread.start()
            self._start_sender()

    def reset(self):
        """Reset the printer
        """
        if self.printer and not self.printer_tcp:
            self.printer.dtr = 1
            time.sleep(0.2)
            self.printer.dtr = 0

    def _readline_buf(self):
        "Try to readline from buffer"
        if len(self.readline_buf):
            chunk = self.readline_buf[-1]
            eol = chunk.find(b'\n')
            if eol >= 0:
                line = b''.join(self.readline_buf[:-1]) + chunk[:(eol + 1)]
                self.readline_buf = []
                if eol + 1 < len(chunk):
                    self.readline_buf.append(chunk[(eol + 1):])
                return line
        return PR_AGAIN

    def _readline_nb(self):
        "Non blocking readline. Socket based files do not support non blocking or timeouting readline"
        if self.printer_tcp:
            line = self._readline_buf()
            if line:
                return line
            chunk_size = 256
            while True:
                chunk = self.printer.read(chunk_size)
                if chunk is SYS_AGAIN and self.selector.select(self.timeout):
                    chunk = self.printer.read(chunk_size)
                #print('_readline_nb chunk', chunk, type(chunk))
                if chunk:
                    self.readline_buf.append(chunk)
                    line = self._readline_buf()
                    if line:
                        return line
                elif chunk is SYS_AGAIN:
                    return PR_AGAIN
                else:
                    #chunk == b'' means EOF
                    line = b''.join(self.readline_buf)
                    self.readline_buf = []
                    self.stop_read_thread = True
                    return line if line else PR_EOF
        else:  # serial port
            return self.printer.readline()

    def _readline(self):
        try:
            line_bytes = self._readline_nb()
            if line_bytes is PR_EOF:
                self.logError(
                    _("Can't read from printer (disconnected?). line_bytes is None"
                      ))
                return PR_EOF
            line = line_bytes.decode('utf-8')

            if len(line) > 1:
                self.log.append(line)
                for handler in self.event_handler:
                    try:
                        handler.on_recv(line)
                    except:
                        logging.error(traceback.format_exc())
                if self.recvcb:
                    try:
                        self.recvcb(line)
                    except:
                        self.logError(traceback.format_exc())
                if self.loud: logging.info("RECV: %s" % line.rstrip())
            return line
        except UnicodeDecodeError:
            self.logError(
                _("Got rubbish reply from %s at baudrate %s:") %
                (self.port, self.baud) + "\n" + _("Maybe a bad baudrate?"))
            return None
        except SelectError as e:
            if 'Bad file descriptor' in e.args[1]:
                self.logError(
                    _("Can't read from printer (disconnected?) (SelectError {0}): {1}"
                      ).format(e.errno, decode_utf8(e.strerror)))
                return None
            else:
                self.logError(
                    _("SelectError ({0}): {1}").format(e.errno,
                                                       decode_utf8(
                                                           e.strerror)))
                raise
        except SerialException as e:
            self.logError(
                _("Can't read from printer (disconnected?) (SerialException): {0}"
                  ).format(decode_utf8(str(e))))
            return None
        except socket.error as e:
            self.logError(
                _("Can't read from printer (disconnected?) (Socket error {0}): {1}"
                  ).format(e.errno, decode_utf8(e.strerror)))
            return None
        except OSError as e:
            if e.errno == errno.EAGAIN:  # Not a real error, no data was available
                return ""
            self.logError(
                _("Can't read from printer (disconnected?) (OS Error {0}): {1}"
                  ).format(e.errno, e.strerror))
            return None

    def _listen_can_continue(self):
        if self.printer_tcp:
            return not self.stop_read_thread and self.printer
        return (not self.stop_read_thread and self.printer
                and self.printer.is_open)

    def _listen_until_online(self):
        while not self.online and self._listen_can_continue():
            self._send("M105")
            if self.writefailures >= 4:
                logging.error(
                    _("Aborting connection attempt after 4 failed writes."))
                return
            empty_lines = 0
            while self._listen_can_continue():
                line = self._readline()
                if line is None: break  # connection problem
                # workaround cases where M105 was sent before printer Serial
                # was online an empty line means read timeout was reached,
                # meaning no data was received thus we count those empty lines,
                # and once we have seen 15 in a row, we just break and send a
                # new M105
                # 15 was chosen based on the fact that it gives enough time for
                # Gen7 bootloader to time out, and that the non received M105
                # issues should be quite rare so we can wait for a long time
                # before resending
                if not line:
                    empty_lines += 1
                    if empty_lines == 15: break
                else: empty_lines = 0
                if line.startswith(tuple(self.greetings)) \
                   or line.startswith('ok') or "T:" in line:
                    self.online = True
                    for handler in self.event_handler:
                        try:
                            handler.on_online()
                        except:
                            logging.error(traceback.format_exc())
                    if self.onlinecb:
                        try:
                            self.onlinecb()
                        except:
                            self.logError(traceback.format_exc())
                    return

    def _listen(self):
        """This function acts on messages from the firmware
        """
        self.clear = True
        if not self.printing:
            self._listen_until_online()
        while self._listen_can_continue():
            line = self._readline()
            if line is None:
                logging.debug('_readline() is None, exiting _listen()')
                break
            if line.startswith('DEBUG_'):
                continue
            if line.startswith(tuple(self.greetings)) or line.startswith('ok'):
                self.clear = True
            if line.startswith('ok') and "T:" in line:
                for handler in self.event_handler:
                    try:
                        handler.on_temp(line)
                    except:
                        logging.error(traceback.format_exc())
                if self.tempcb:
                    # callback for temp, status, whatever
                    try:
                        self.tempcb(line)
                    except:
                        self.logError(traceback.format_exc())
            elif line.startswith('Error'):
                self.logError(line)
            # Teststrings for resend parsing       # Firmware     exp. result
            # line="rs N2 Expected checksum 67"    # Teacup       2
            if line.lower().startswith("resend") or line.startswith("rs"):
                for haystack in ["N:", "N", ":"]:
                    line = line.replace(haystack, " ")
                linewords = line.split()
                while len(linewords) != 0:
                    try:
                        toresend = int(linewords.pop(0))
                        self.resendfrom = toresend
                        break
                    except:
                        pass
                self.clear = True
        self.clear = True
        logging.debug('Exiting read thread')

    def _start_sender(self):
        self.stop_send_thread = False
        self.send_thread = threading.Thread(target=self._sender,
                                            name='send thread')
        self.send_thread.start()

    def _stop_sender(self):
        if self.send_thread:
            self.stop_send_thread = True
            self.send_thread.join()
            self.send_thread = None

    def _sender(self):
        while not self.stop_send_thread:
            try:
                command = self.priqueue.get(True, 0.1)
            except QueueEmpty:
                continue
            while self.printer and self.printing and not self.clear:
                time.sleep(0.001)
            self._send(command)
            while self.printer and self.printing and not self.clear:
                time.sleep(0.001)

    def _checksum(self, command):
        return reduce(lambda x, y: x ^ y, map(ord, command))

    def startprint(self, gcode, startindex=0):
        """Start a print, gcode is an array of gcode commands.
        returns True on success, False if already printing.
        The print queue will be replaced with the contents of the data array,
        the next line will be set to 0 and the firmware notified. Printing
        will then start in a parallel thread.
        """
        if self.printing or not self.online or not self.printer:
            return False
        self.queueindex = startindex
        self.mainqueue = gcode
        self.printing = True
        self.lineno = 0
        self.resendfrom = -1
        if not gcode or not gcode.lines:
            return True

        self.clear = False
        self._send("M110", -1, True)

        resuming = (startindex != 0)
        self.print_thread = threading.Thread(target=self._print,
                                             name='print thread',
                                             kwargs={"resuming": resuming})
        self.print_thread.start()
        return True

    def cancelprint(self):
        self.pause()
        self.paused = False
        self.mainqueue = None
        self.clear = True

    # run a simple script if it exists, no multithreading
    def runSmallScript(self, filename):
        if not filename: return
        try:
            with open(filename) as f:
                for i in f:
                    l = i.replace("\n", "")
                    l = l.partition(';')[0]  # remove comments
                    self.send_now(l)
        except:
            pass

    def pause(self):
        """Pauses the print, saving the current position.
        """
        if not self.printing: return False
        self.paused = True
        self.printing = False

        # ';@pause' in the gcode file calls pause from the print thread
        if not threading.current_thread() is self.print_thread:
            try:
                self.print_thread.join()
            except:
                self.logError(traceback.format_exc())

        self.print_thread = None

        # saves the status
        self.pauseX = self.analyzer.abs_x
        self.pauseY = self.analyzer.abs_y
        self.pauseZ = self.analyzer.abs_z
        self.pauseE = self.analyzer.abs_e
        self.pauseF = self.analyzer.current_f
        self.pauseRelative = self.analyzer.relative
        self.pauseRelativeE = self.analyzer.relative_e

    def resume(self):
        """Resumes a paused print."""
        if not self.paused: return False
        # restores the status
        self.send_now("G90")  # go to absolute coordinates

        xyFeed = '' if self.xy_feedrate is None else ' F' + str(
            self.xy_feedrate)
        zFeed = '' if self.z_feedrate is None else ' F' + str(self.z_feedrate)

        self.send_now("G1 X%s Y%s%s" % (self.pauseX, self.pauseY, xyFeed))
        self.send_now("G1 Z" + str(self.pauseZ) + zFeed)
        self.send_now("G92 E" + str(self.pauseE))

        # go back to relative if needed
        if self.pauseRelative:
            self.send_now("G91")
        if self.pauseRelativeE:
            self.send_now('M83')
        # reset old feed rate
        self.send_now("G1 F" + str(self.pauseF))

        self.paused = False
        self.printing = True
        self.print_thread = threading.Thread(target=self._print,
                                             name='print thread',
                                             kwargs={"resuming": True})
        self.print_thread.start()

    def send(self, command, wait=0):
        """Adds a command to the checksummed main command queue if printing, or
        sends the command immediately if not printing"""

        if self.online:
            if self.printing:
                self.mainqueue.append(command)
            else:
                self.priqueue.put_nowait(command)
        else:
            self.logError(_("Not connected to printer."))

    def send_now(self, command, wait=0):
        """Sends a command to the printer ahead of the command queue, without a
        checksum"""
        if self.online:
            self.priqueue.put_nowait(command)
        else:
            self.logError(_("Not connected to printer."))

    def _print(self, resuming=False):
        self._stop_sender()
        try:
            for handler in self.event_handler:
                try:
                    handler.on_start(resuming)
                except:
                    logging.error(traceback.format_exc())
            if self.startcb:
                # callback for printing started
                try:
                    self.startcb(resuming)
                except:
                    self.logError(
                        _("Print start callback failed with:") + "\n" +
                        traceback.format_exc())
            while self.printing and self.printer and self.online:
                self._sendnext()
            self.sentlines = {}
            self.log.clear()
            self.sent = []
            for handler in self.event_handler:
                try:
                    handler.on_end()
                except:
                    logging.error(traceback.format_exc())
            if self.endcb:
                # callback for printing done
                try:
                    self.endcb()
                except:
                    self.logError(
                        _("Print end callback failed with:") + "\n" +
                        traceback.format_exc())
        except:
            self.logError(
                _("Print thread died due to the following error:") + "\n" +
                traceback.format_exc())
        finally:
            self.print_thread = None
            self._start_sender()

    def process_host_command(self, command):
        """only ;@pause command is implemented as a host command in printcore, but hosts are free to reimplement this method"""
        command = command.lstrip()
        if command.startswith(";@pause"):
            self.pause()

    def _sendnext(self):
        if not self.printer:
            return
        while self.printer and self.printing and not self.clear:
            time.sleep(0.001)
        # Only wait for oks when using serial connections or when not using tcp
        # in streaming mode
        if not self.printer_tcp or not self.tcp_streaming_mode:
            self.clear = False
        if not (self.printing and self.printer and self.online):
            self.clear = True
            return
        if self.resendfrom < self.lineno and self.resendfrom > -1:
            self._send(self.sentlines[self.resendfrom], self.resendfrom, False)
            self.resendfrom += 1
            return
        self.resendfrom = -1
        if not self.priqueue.empty():
            self._send(self.priqueue.get_nowait())
            self.priqueue.task_done()
            return
        if self.printing and self.mainqueue.has_index(self.queueindex):
            (layer, line) = self.mainqueue.idxs(self.queueindex)
            gline = self.mainqueue.all_layers[layer][line]
            if self.queueindex > 0:
                (prev_layer,
                 prev_line) = self.mainqueue.idxs(self.queueindex - 1)
                if prev_layer != layer:
                    for handler in self.event_handler:
                        try:
                            handler.on_layerchange(layer)
                        except:
                            logging.error(traceback.format_exc())
            if self.layerchangecb and self.queueindex > 0:
                (prev_layer,
                 prev_line) = self.mainqueue.idxs(self.queueindex - 1)
                if prev_layer != layer:
                    try:
                        self.layerchangecb(layer)
                    except:
                        self.logError(traceback.format_exc())
            for handler in self.event_handler:
                try:
                    handler.on_preprintsend(gline, self.queueindex,
                                            self.mainqueue)
                except:
                    logging.error(traceback.format_exc())
            if self.preprintsendcb:
                if self.mainqueue.has_index(self.queueindex + 1):
                    (next_layer,
                     next_line) = self.mainqueue.idxs(self.queueindex + 1)
                    next_gline = self.mainqueue.all_layers[next_layer][
                        next_line]
                else:
                    next_gline = None
                gline = self.preprintsendcb(gline, next_gline)
            if gline is None:
                self.queueindex += 1
                self.clear = True
                return
            tline = gline.raw
            if tline.lstrip().startswith(";@"):  # check for host command
                self.process_host_command(tline)
                self.queueindex += 1
                self.clear = True
                return

            # Strip comments
            tline = gcoder.gcode_strip_comment_exp.sub("", tline).strip()
            if tline:
                self._send(tline, self.lineno, True)
                self.lineno += 1
                for handler in self.event_handler:
                    try:
                        handler.on_printsend(gline)
                    except:
                        logging.error(traceback.format_exc())
                if self.printsendcb:
                    try:
                        self.printsendcb(gline)
                    except:
                        self.logError(traceback.format_exc())
            else:
                self.clear = True
            self.queueindex += 1
        else:
            self.printing = False
            self.clear = True
            if not self.paused:
                self.queueindex = 0
                self.lineno = 0
                self._send("M110", -1, True)

    def _send(self, command, lineno=0, calcchecksum=False):
        # Only add checksums if over serial (tcp does the flow control itself)
        if calcchecksum and not self.printer_tcp:
            prefix = "N" + str(lineno) + " " + command
            command = prefix + "*" + str(self._checksum(prefix))
            if "M110" not in command:
                self.sentlines[lineno] = command
        if self.printer:
            self.sent.append(command)
            # run the command through the analyzer
            gline = None
            try:
                gline = self.analyzer.append(command, store=False)
            except:
                logging.warning(
                    _("Could not analyze command %s:") % command + "\n" +
                    traceback.format_exc())
            if self.loud:
                logging.info("SENT: %s" % command)

            for handler in self.event_handler:
                try:
                    handler.on_send(command, gline)
                except:
                    logging.error(traceback.format_exc())
            if self.sendcb:
                try:
                    self.sendcb(command, gline)
                except:
                    self.logError(traceback.format_exc())
            try:
                self.printer.write((command + "\n").encode('ascii'))
                if self.printer_tcp:
                    try:
                        self.printer.flush()
                    except socket.timeout:
                        pass
                self.writefailures = 0
            except socket.error as e:
                if e.errno is None:
                    self.logError(
                        _("Can't write to printer (disconnected ?):") + "\n" +
                        traceback.format_exc())
                else:
                    self.logError(
                        _("Can't write to printer (disconnected?) (Socket error {0}): {1}"
                          ).format(e.errno, decode_utf8(e.strerror)))
                self.writefailures += 1
            except SerialException as e:
                self.logError(
                    _("Can't write to printer (disconnected?) (SerialException): {0}"
                      ).format(decode_utf8(str(e))))
                self.writefailures += 1
            except RuntimeError as e:
                self.logError(
                    _("Socket connection broken, disconnected. ({0}): {1}").
                    format(e.errno, decode_utf8(e.strerror)))
                self.writefailures += 1
Example #45
0
def copy_events(
    src_events: List[Event],
    src_id_dst_event: Dict[int, Event],
    src_dst_ids_assets: Dict[int, int],
    project_src: str,
    runtime: int,
    client: CogniteClient,
    src_filter: List[Event],
    jobs: queue.Queue = None,
):
    """
    Creates/updates event objects and then attempts to create and update these objects in the destination.

    Args:
        src_events: A list of the events that are in the source.
        src_id_dst_event:  A dictionary of an events source id to it's matching destination object.
        src_dst_ids_assets: A dictionary of all the mappings of source asset id to destination asset id.
        project_src: The name of the project the object is being replicated from.
        runtime: The timestamp to be used in the new replicated metadata.
        client: The client corresponding to the destination project.
        src_filter: List of events in the destination - Will be used for comparison if current events where not copied by the replicator
    """

    if jobs:
        use_queue_logic = True
        do_while = not jobs.empty()
    else:
        use_queue_logic = False
        do_while = True

    while do_while:
        if use_queue_logic:
            chunk = jobs.get()
            chunk_events = src_events[chunk[0] : chunk[1]]
        else:
            chunk_events = src_events

        logging.debug(f"Starting to replicate {len(chunk_events)} events.")

        create_events, update_events, unchanged_events = replication.make_objects_batch(
            chunk_events,
            src_id_dst_event,
            src_dst_ids_assets,
            create_event,
            update_event,
            project_src,
            runtime,
            src_filter=src_filter,
        )

        logging.info(f"Creating {len(create_events)} new events and updating {len(update_events)} existing events.")

        if create_events:
            logging.debug(f"Attempting to create {len(create_events)} events.")
            create_events = replication.retry(client.events.create, create_events)
            logging.debug(f"Successfully created {len(create_events)} events.")

        if update_events:
            logging.debug(f"Attempting to update {len(update_events)} events.")
            update_events = replication.retry(client.events.update, update_events)
            logging.debug(f"Successfully updated {len(update_events)} events.")

        logging.info(f"Created {len(create_events)} new events and updated {len(update_events)} existing events.")

        if use_queue_logic:
            jobs.task_done()
            do_while = not jobs.empty()
        else:
            do_while = False
Example #46
0
class ThumbnailMakerService(object):
    def __init__(self, home_dir='.'):
        self.home_dir = home_dir
        self.input_dir = self.home_dir + os.path.sep + 'incoming'
        self.output_dir = self.home_dir + os.path.sep + 'outgoing'
        self.abs_inp_dir = os.getcwd() + os.path.sep + 'incoming'
        self.abs_oup_dir = os.getcwd() + os.path.sep + 'outgoing'
        self.img_queue = Queue()

    def download_images(self, img_url_list):
        # validate inputs
        if not img_url_list:
            return
        os.makedirs(self.input_dir, exist_ok=True)

        logging.info("beginning image downloads")

        start = time.perf_counter()
        os.chdir(self.abs_inp_dir)
        for url in img_url_list:
            # download each image and save to the input dir
            img_filename = str(url).split(os.path.sep)[-1]
            r = requests.get(url)
            with open(img_filename, 'wb') as f:
                f.write(r.content)
                self.img_queue.put(img_filename)

        end = time.perf_counter()
        self.img_queue.put(
            None
        )  #Poison pill technique, indicate to the consumer that now there is nothing that will be produced on the queue

        logging.info("downloaded {} images in {} seconds".format(
            len(img_url_list), end - start))

    def perform_resizing(self):
        # validate inputs

        os.makedirs(self.output_dir, exist_ok=True)

        logging.info("beginning image resizing")
        target_sizes = [32, 64, 200]
        num_images = len(os.listdir(self.abs_inp_dir))

        start = time.perf_counter()
        while True:
            filename = self.img_queue.get()
            if filename:
                logging.info('Resizing image -- {}'.format(filename))
                orig_img = Image.open(self.abs_inp_dir + os.path.sep +
                                      filename)
                for basewidth in target_sizes:
                    img = orig_img
                    # calculate target height of the resized image to maintain the aspect ratio
                    wpercent = (basewidth / float(img.size[0]))
                    hsize = int((float(img.size[1]) * float(wpercent)))
                    # perform resizing
                    img = img.resize((basewidth, hsize), PIL.Image.LANCZOS)

                    # save the resized image to the output dir with a modified file name
                    new_filename = os.path.splitext(filename)[0] + \
                        '_' + str(basewidth) + os.path.splitext(filename)[1]

                    img.save(self.abs_oup_dir + os.path.sep + new_filename)

                os.remove(self.abs_inp_dir + os.path.sep + filename)
                logging.info('Done resizing image -- {}'.format(filename))
                self.img_queue.task_done()
            else:
                self.img_queue.task_done()
                break

        end = time.perf_counter()

        logging.info("created {} thumbnails in {} seconds".format(
            num_images, end - start))

    def make_thumbnails(self, img_url_list):
        logging.info("START make_thumbnails")
        start = time.perf_counter()

        t1 = Thread(target=self.download_images, args=([img_url_list]))
        t2 = Thread(target=self.perform_resizing)
        t1.start()
        t2.start()
        t1.join()
        t2.join()

        end = time.perf_counter()
        logging.info("END make_thumbnails in {} seconds".format(end - start))
class AsynchronousLoader(object):
    """
    Class for asynchronously loading from CPU memory to device memory with DataLoader.

    Note that this only works for single GPU training, multiGPU uses PyTorch's DataParallel or
    DistributedDataParallel which uses its own code for transferring data across GPUs. This could just
    break or make things slower with DataParallel or DistributedDataParallel.

    Args:
        data: The PyTorch Dataset or DataLoader we're using to load.
        device: The PyTorch device we are loading to
        q_size: Size of the queue used to store the data loaded to the device
        num_batches: Number of batches to load. This must be set if the dataloader
            doesn't have a finite __len__. It will also override DataLoader.__len__
            if set and DataLoader has a __len__. Otherwise it can be left as None
        **kwargs: Any additional arguments to pass to the dataloader if we're
            constructing one here
    """
    def __init__(self,
                 data,
                 device=torch.device('cuda', 0),
                 q_size=10,
                 num_batches=None,
                 **kwargs):
        if isinstance(data, torch.utils.data.DataLoader):
            self.dataloader = data
        else:
            self.dataloader = DataLoader(data, **kwargs)

        if num_batches is not None:
            self.num_batches = num_batches
        elif hasattr(self.dataloader, '__len__'):
            self.num_batches = len(self.dataloader)
        else:
            raise Exception(
                "num_batches must be specified or data must have finite __len__"
            )

        self.device = device
        self.q_size = q_size

        self.load_stream = torch.cuda.Stream(device=device)
        self.queue = Queue(maxsize=self.q_size)

        self.idx = 0

    def load_loop(
            self):  # The loop that will load into the queue in the background
        for i, sample in enumerate(self.dataloader):
            self.queue.put(self.load_instance(sample))
            if i == len(self):
                break

    # Recursive loading for each instance based on torch.utils.data.default_collate
    def load_instance(self, sample):
        if torch.is_tensor(sample):
            with torch.cuda.stream(self.load_stream):
                # Can only do asynchronous transfer if we use pin_memory
                if not sample.is_pinned():
                    sample = sample.pin_memory()
                return sample.to(self.device, non_blocking=True)
        else:
            return [self.load_instance(s) for s in sample]

    def __iter__(self):
        # We don't want to run the thread more than once
        # Start a new thread if we are at the beginning of a new epoch, and our current worker is dead
        if (not hasattr(self, 'worker') or not self.worker.is_alive()
            ) and self.queue.empty() and self.idx == 0:
            self.worker = Thread(target=self.load_loop)
            self.worker.daemon = True
            self.worker.start()
        return self

    def __next__(self):
        # If we've reached the number of batches to return
        # or the queue is empty and the worker is dead then exit
        done = not self.worker.is_alive() and self.queue.empty()
        done = done or self.idx >= len(self)
        if done:
            self.idx = 0
            self.queue.join()
            self.worker.join()
            raise StopIteration
        # Otherwise return the next batch
        out = self.queue.get()
        self.queue.task_done()
        self.idx += 1
        return out

    def __len__(self):
        return self.num_batches
Example #48
0
class Plotter:
    """Renders rollouts of the policy as it trains.

    Usually, this class is used by sending plot=True to LocalRunner.train().

    Args:
        env (gym.Env): The environment to perform rollouts in. This will be
        used without copying in the current process but in a separate thread,
        so it should be given a unique copy (in particular, do not pass the
        environment here, then try to pickle it, or you will occasionally get
        crashes).
        policy (garage.tf.Policy): The policy to do the rollouts with.
        sess (tf.Session): The TensorFlow session to use.
        graph (tf.Graph): The TensorFlow graph to use.
        rollout (callable): The rollout function to call.

    """

    # List containing all plotters instantiated in the process
    __plotters = []

    def __init__(self,
                 env,
                 policy,
                 sess=None,
                 graph=None,
                 rollout=default_rollout):
        Plotter.__plotters.append(self)
        self._env = env
        self.sess = tf.compat.v1.Session() if sess is None else sess
        self.graph = tf.compat.v1.get_default_graph(
        ) if graph is None else graph
        with self.sess.as_default(), self.graph.as_default():
            self._policy = policy.clone('plotter_policy')
        self.rollout = rollout
        self.worker_thread = Thread(target=self._start_worker, daemon=True)
        self.queue = Queue()

        # Needed in order to draw glfw window on the main thread
        if 'Darwin' in platform.platform():
            self.rollout(self._env,
                         self._policy,
                         max_episode_length=np.inf,
                         animated=True,
                         speedup=5)

    def _start_worker(self):
        max_length = None
        initial_rollout = True
        try:
            with self.sess.as_default(), self.sess.graph.as_default():
                # Each iteration will process ALL messages currently in the
                # queue
                while True:
                    msgs = {}
                    # If true, block and yield processor
                    if initial_rollout:
                        msg = self.queue.get()
                        msgs[msg.op] = msg
                        # Only fetch the last message of each type
                        while not self.queue.empty():
                            msg = self.queue.get()
                            msgs[msg.op] = msg
                    else:
                        # Only fetch the last message of each type
                        while not self.queue.empty():
                            msg = self.queue.get_nowait()
                            msgs[msg.op] = msg

                    if Op.STOP in msgs:
                        self.queue.task_done()
                        break
                    if Op.UPDATE in msgs:
                        self._env, self._policy = msgs[Op.UPDATE].args
                        self.queue.task_done()
                    if Op.DEMO in msgs:
                        param_values, max_length = msgs[Op.DEMO].args
                        self._policy.set_param_values(param_values)
                        initial_rollout = False
                        self.rollout(self._env,
                                     self._policy,
                                     max_episode_length=max_length,
                                     animated=True,
                                     speedup=5)
                        self.queue.task_done()
                    else:
                        if max_length:
                            self.rollout(self._env,
                                         self._policy,
                                         max_episode_length=max_length,
                                         animated=True,
                                         speedup=5)
        except KeyboardInterrupt:
            pass

    def close(self):
        """Stop the Plotter's worker thread."""
        if self.worker_thread.is_alive():
            while not self.queue.empty():
                self.queue.get()
                self.queue.task_done()
            self.queue.put(Message(op=Op.STOP, args=None, kwargs=None))
            self.queue.join()
            self.worker_thread.join()

    @staticmethod
    def get_plotters():
        """Return all garage.tf.Plotter's.

        Returns:
            list[garage.tf.Plotter]: All the garage.tf.Plotter's

        """
        return Plotter.__plotters

    def start(self):
        """Start the Plotter's worker thread."""
        if not self.worker_thread.is_alive():
            tf.compat.v1.get_variable_scope().reuse_variables()
            self.worker_thread.start()
            self.queue.put(
                Message(op=Op.UPDATE,
                        args=(self._env, self._policy),
                        kwargs=None))
            atexit.register(self.close)

    def update_plot(self, policy, max_length=np.inf):
        """Update the policy being plotted.

        Args:
            policy (garage.tf.Policy): The policy to rollout.
            max_length (int or float): The maximum length to allow a rollout to
                be. Defaults to infinity.

        """
        if self.worker_thread.is_alive():
            self.queue.put(
                Message(op=Op.DEMO,
                        args=(policy.get_param_values(), max_length),
                        kwargs=None))
Example #49
0
class OSXCmd(cmd.Cmd, LoggerObject):
    def __init__(self, history_size=10):
        # both cmd.Cmd, LoggerObject need to be init.
        cmd.Cmd.__init__(self)
        LoggerObject.__init__(self)

        self.cmdqueue = Queue()
        self.history_size = history_size

    def registerKeyboardInterrupt(self):
        stdin = NSFileHandle.fileHandleWithStandardInput().retain()

        handle = objc.selector(self.keyboardHandler_, signature='v@:@')
        NSNotificationCenter.defaultCenter().addObserver_selector_name_object_(
            self, handle, NSFileHandleReadCompletionNotification, stdin)

        stdin.readInBackgroundAndNotify()

    def unregisterKeyboardInterrupt(self):
        NSNotificationCenter.defaultCenter().removeObserver_(self)

    def keyboardHandler_(self, notification):
        data = notification.userInfo().objectForKey_(
            NSFileHandleNotificationDataItem)
        line = NSString.alloc().initWithData_encoding_(
            data, NSUTF8StringEncoding).autorelease()

        if len(line):
            self.cmdqueue.put(line)

        stdin = NSFileHandle.fileHandleWithStandardInput().retain()
        stdin.readInBackgroundAndNotify()

    def cmdloop(self, intro=None):
        # customized for python & OSX co-existence
        # use OSX framework to read input from keyboard interrupt
        self.preloop()
        if intro is not None:
            self.intro = intro
        if self.intro:
            self.stdout.write(str(self.intro) + "\n")
        # the main loop
        stop = None
        showPrompt = True
        while not stop:
            if showPrompt:
                self.stdout.write(self.prompt)
                self.stdout.flush()
                showPrompt = False
            try:
                NSRunLoop.currentRunLoop().runMode_beforeDate_(
                    NSDefaultRunLoopMode, NSDate.distantPast())
                line = self.cmdqueue.get_nowait()
                if not len(line):
                    line = "EOF"
                else:
                    line = line.strip('\r\n')
                line = self.precmd(line)
                stop = self.onecmd(line)
                stop = self.postcmd(stop, line)
                self.cmdqueue.task_done()
                showPrompt = True
            except Empty:
                continue
            except KeyboardInterrupt:
                break
            except Exception as e:
                import traceback
                print traceback.format_exc()
                break
        # cleanup
        self.postloop()

    def preloop(self):
        # cmd history
        self._history = []
        # OSX
        self.osx_pool = NSAutoreleasePool.alloc().init()
        self.registerKeyboardInterrupt()

    def postloop(self):
        self.unregisterKeyboardInterrupt()
        del self.osx_pool

    def endloop(self):
        self.cmdqueue.put("exit")

    def precmd(self, line):
        self._history += [line.strip()]
        if len(self._history) > self.history_size:
            self._history = self._history[-(self.history_size):]
        self.unregisterKeyboardInterrupt()
        return line

    def postcmd(self, stop, line):
        try:
            self.stdout.flush()
        except:
            pass
        self.registerKeyboardInterrupt()
        return stop

    def emptyline(self):
        pass

    def do_shell(self, args):
        """Execute shell command
        """
        os.system(args)

    def do_debug(self, args):
        """Enable/disable debugging information
        """
        if not hasattr(self, 'debug'):
            return
        option = args.strip()
        if option == "":
            pass
        elif option == "True":
            self.debug = True
        elif option == "False":
            self.debug = False
        else:
            self.stdout.write("Only accept True/False\n")
        ans = "%s is %sin debug mode.\n"
        cls_name = self.__class__.__name__
        if self.debug:
            ans = ans % (cls_name, "")
        else:
            ans = ans % (cls_name, "not ")
        self.stdout.write(ans)
        self.stdout.flush()

    def default(self, line):
        if len(line.strip()):
            self.do_eval(line)

    def do_eval(self, args):
        """Evaluate a single line python statement
        """
        line = args.strip()
        if len(line) == 0:
            return
        output = ""
        oldstdout = self.stdout
        from StringIO import StringIO
        import ast
        buffer = StringIO()
        self.stdout = buffer
        try:
            code = compile(line, "<string>", "single")
            exec(code)
        except NameError as e:
            self.logger.debug(e)
            cmd, args, line = self.parseline(line)
            self.commandNotFound(cmd)
        except SyntaxError as e:
            self.logger.debug(e)
            cmd, args, line = self.parseline(line)
            self.commandNotFound(cmd)
        except Exception as e:
            self.logger.debug(e)
            self.stdout.write(pformat(e) + "\n")
        finally:
            self.stdout = oldstdout
            self.stdout.write(buffer.getvalue())

    def commandNotFound(self, cmd):
        self.stdout.write("Command: '%s' is not yet support by %s\n" %
                          (cmd, self.__class__.__name__))

    def do_hist(self, args):
        """Show last N command history
        """
        length = len(self._history)
        try:
            length = int(args.strip())
        except:
            pass
        self._history.pop()
        for cmd in self._history[-length:]:
            self.stdout.write(cmd)
            self.stdout.write('\n')
        self.stdout.flush()

    def do_exit(self, args):
        """Exit
        """
        return True
Example #50
0
class JSONDumper:
    def __init__(self):
        self.outfile = None
        self.transformations = None
        self.encode = None
        self.url = None
        self.lock = None
        self.auth = None
        self.queue = Queue()
        self.configfile = "config.json"
        self.ip_pool = json.load(open(self.configfile))
        constant.HOME = os.getcwd()

    def done(self):
        self.queue.join()
        if self.outfile:
            self.outfile.close()

    fields = {
        'timestamp': (
            ('error', 'timestamp'),
            ('request', 'timestamp_start'),
            ('request', 'timestamp_end'),
            ('response', 'timestamp_start'),
            ('response', 'timestamp_end'),
            ('client_conn', 'timestamp_start'),
            ('client_conn', 'timestamp_end'),
            ('client_conn', 'timestamp_tls_setup'),
            ('server_conn', 'timestamp_start'),
            ('server_conn', 'timestamp_end'),
            ('server_conn', 'timestamp_tls_setup'),
            ('server_conn', 'timestamp_tcp_setup'),
        ),
        'ip': (
            ('server_conn', 'source_address'),
            ('server_conn', 'ip_address'),
            ('server_conn', 'address'),
            ('client_conn', 'address'),
        ),
        'ws_messages': (('messages', ), ),
        'headers': (
            ('request', 'headers'),
            ('response', 'headers'),
        ),
        'content': (
            ('request', 'content'),
            ('response', 'content'),
        ),
    }

    def _init_transformations(self):
        """
            For fields data processing
        """
        self.transformations = [{
            'fields': self.fields['headers'],
            'func': dict,
        }, {
            'fields': self.fields['timestamp'],
            'func': lambda t: int(t * 1000),
        }, {
            'fields': self.fields['ip'],
            'func': lambda addr: {
                'host': addr[0].replace('::ffff:', ''),
                'port': addr[1],
            },
        }, {
            'fields':
            self.fields['ws_messages'],
            'func':
            lambda ms: [{
                'type':
                m[0],
                'from_client':
                m[1],
                'content':
                base64.b64encode(bytes(m[2], 'utf-8'))
                if self.encode else m[2],
                'timestamp':
                int(m[3] * 1000),
            } for m in ms],
        }]

        if self.encode:
            self.transformations.append({
                'fields': self.fields['content'],
                'func': base64.b64encode,
            })

    @staticmethod
    def transform_field(obj, path, func):
        """
            :param obj: frame
            :param path: tuple, e.g. ('error', 'timestamp')
            :param func: function to operate
            Apply a transformation function `func` to a value under the specified `path` in the `obj` DICTIONary.
        """
        for key in path[:
                        -1]:  # here to access the first element of path[], such as 'error'
            if not (key in obj and obj[key]):
                return
            obj = obj[key]  # to step into the first level of fields
        if path[-1] in obj and obj[path[-1]]:
            # path[-1] (e.g. 'source_address'), if exists, call the function and do the transformations
            obj[path[-1]] = func(obj[path[-1]])

    @classmethod
    def convert_to_strings(cls, obj):
        """
            :param obj: dict/list/bytes 
            Recursively convert all list/dict elements of type `bytes` into strings.
        """
        if isinstance(obj, dict):  # 如果传进的参数是DICTIONary
            return {
                cls.convert_to_strings(key): cls.convert_to_strings(value)
                for key, value in obj.items()
            }
        elif isinstance(obj, list) or isinstance(
                obj, tuple):  # 如果传进的参数是list或者是tuple
            return [cls.convert_to_strings(element) for element in obj]
        elif isinstance(obj, bytes):  # 如果传进的参数是bytes
            return str(obj)[2:-1]
        return obj

    def worker(self):
        """
            framework to write
        """
        while True:
            frame = self.queue.get()
            self.dump(frame)
            self.queue.task_done()

    def dump(self, frame):
        """
            :param frame: frame
            Transform and dump (write / send) a data frame.
        """
        for tfm in self.transformations:
            for field in tfm['fields']:
                self.transform_field(frame, field, tfm['func'])
        frame = self.convert_to_strings(frame)
        # to get the ip address of the client
        ip_address = frame['client_conn']["address"]['host']
        # to get the request & response
        request_frame = frame['request']
        response_frame = frame['response']
        # to get content of request & response respectively
        request_content = request_frame['content']
        request_content_obj = {'content': request_content}
        response_content = response_frame['content']
        response_content_obj = {'content': response_content}

        # set flag to see whether to write request content or not
        rtc_flag = (len(request_content.strip()) != 0)
        # set flag to see whether to write response content or not
        rec_flag = (len(response_content.strip()) != 0)

        # clear the content, since it can be very large, so we store it in another single file
        request_frame['content'] = ""
        response_frame['content'] = ""

        # set the first directory and second directory and file suffix
        first_subfile = 'others'
        second_subfile = 'others'
        file_suffix = ''

        # classification according to each frame's response headers' content-type
        try:
            type_name = response_frame['headers']['Content-Type'].split(
                ";")[0].replace(" ", "")
            first_subfile = type_name.split('/')[0].replace(' ', '')
            second_subfile = type_name.split('/')[1].replace(' ', '')
        except KeyError:
            try:
                type_name = response_frame['headers']['content-type'].split(
                    ";")[0].replace(" ", "")
                first_subfile = type_name.split('/')[0].replace(' ', '')
                second_subfile = type_name.split('/')[1].replace(' ', '')
            except KeyError:
                type_name = None

        # if response_frame['headers'].__contains__('Content-Type') or response_frame['headers'].__contains__('content-type'):
        #     if response_frame['headers'].__contains__('Content-Type'):
        #         name = 'Content-Type'
        #     else:
        #         name = 'content-type'
        #     type_name = response_frame['headers'][name].split(';')[
        #         0].replace(' ', '')
        #     first_subfile = type_name.split('/')[0].replace(' ', '')
        #     second_subfile = type_name.split('/')[1].replace(' ', '')
        if type_name:
            try:
                file_suffix = constant.SUFFIX[type_name][0]
            except KeyError:
                file_suffix = ".bin"
        else:
            return
            # if constant.SUFFIX.__contains__(type_name):
            #     file_suffix = constant.SUFFIX[type_name][0]

            # classification = response_frame['headers'][name].split('/')
            # first_subfile = classification[0].replace(' ', '')
            # second_subfile = classification[1].split(';')[0]
        self.lock.acquire()

        self.step_into_subfile(ip_address, first_subfile, second_subfile)
        # assign the id for each ip address
        try:
            session_id = self.ip_pool[ip_address]
        except KeyError:
            session_id = 1
            self.ip_pool[ip_address] = 1
        request_filename = str(session_id) + '_request.json'
        response_filename = str(session_id) + '_response.json'
        """
            to write out the request header information
        """
        self.write_into_file(request_filename, request_frame)
        """
            To write out the response header information
        """
        self.write_into_file(response_filename, response_frame)
        """
            To write out the request content if any
        """
        # if rtc_flag:
        #     rtc_filename = str(session_id)+'_request_content.json'
        #     self.write_into_file(rtc_filename,request_content_obj)
        """
            To write out the response content into right format file
        """
        if rec_flag:
            """
                here for debugging
            """
            # rec_file = str(session_id)+'_response_content.json'
            # self.write_into_file(rec_file,response_content_obj)
            rec_filename = str(session_id) + '_response_content'
            # TODO: transfer string to bytes
            if response_content[0:8] == constant.GZIP_HEADER:
                try:
                    self.str2byte_gzip(file_suffix, rec_filename,
                                       response_content)
                except Exception:
                    pass
            else:
                self.str2byte_nogzip(file_suffix, rec_filename,
                                     response_content)

        # back to the root directory of the project
        os.chdir(constant.HOME)
        # To record the
        self.ip_pool[ip_address] += 1
        # self.write_into_file(self.configfile, self.ip_pool, 'w+')
        open(self.configfile, 'w+').write(json.dumps(self.ip_pool))
        self.lock.release()

    @classmethod
    def str2byte_nogzip(cls, suffix, filename, string):
        """
            :param suffix: suffix of the file
            :param filename: str
            :param string: str that contains bytes information
            transfer string to bytes, and store it in the right file
        """
        i = 0
        with open(filename + suffix, 'wb+') as fstb:
            while i < len(string):
                if string[i] == "\\":
                    if string[i + 1] == 'x':
                        number = int(string[i + 2:i + 4], 16)
                        fstb.write(number.to_bytes(1, 'little'))
                        i += 4
                    elif constant.DICTION.__contains__(string[i + 1]):
                        number = constant.DICTION[string[i + 1]]
                        fstb.write(number.to_bytes(1, 'little'))
                        i += 2
                    else:
                        fstb.write(bytes(string[i], 'utf-8'))
                        i += 2
                else:
                    fstb.write(bytes(string[i], 'utf-8'))
                    i += 1

    @classmethod
    def str2byte_gzip(cls, suffix, filename, string):
        bytes_string = b''
        i = 0
        with open(filename + suffix, 'w+', encoding='utf-8') as f:
            while i < len(string):
                if string[i] == "\\":
                    if string[i + 1] == "x":
                        number = int(string[i + 2:i + 4], 16)
                        bytes_string += number.to_bytes(1, 'little')
                        i += 4
                    elif constant.DICTION.__contains__(string[i + 1]):
                        number = constant.DICTION[string[i + 1]]
                        bytes_string += number.to_bytes(1, 'little')
                        i += 2
                    else:
                        bytes_string += bytes(string[i], 'utf-8')
                        i += 2
                else:
                    bytes_string += bytes(string[i], 'utf-8')
                    i += 1
            tmp = BytesIO(bytes_string)
            fm = gzip.GzipFile(fileobj=tmp)
            content = fm.read().decode('utf-8')
            f.write(content)

    @staticmethod
    def load(loader):
        """
            Extra options to be specified in `~/.mitmproxy/config.yaml`.
        """
        loader.add_option('dump_encodecontent', bool, False,
                          'Encode content as base64.')
        loader.add_option('dump_destination', str, 'jsondump.out',
                          'Output destination: path to a file or URL.')
        loader.add_option('dump_username', str, '',
                          'Basic auth username for URL destinations.')
        loader.add_option('dump_password', str, '',
                          'Basic auth password for URL destinations.')

    def configure(self, _):
        """
            Determine the destination type and path, initialize the output
            transformation rules.
        """
        self.encode = ctx.options.dump_encodecontent
        self.lock = Lock()
        ctx.log.info('Writing all data frames to %s' %
                     ctx.options.dump_destination)
        self._init_transformations()
        t = Thread(target=self.worker)
        t.daemon = True
        t.start()

    def response(self, flow):
        """
            Dump request/response pairs.
        """
        self.queue.put(flow.get_state())

    def step_into_subfile(self, ip_address, first_subfile, second_subfile):
        try:
            os.chdir(ip_address)
        except FileNotFoundError:
            os.makedirs(ip_address)
            os.chdir(ip_address)

        # to step into first subfile
        try:
            os.chdir(first_subfile)
        except FileNotFoundError:
            os.makedirs(first_subfile)
            os.chdir(first_subfile)
        # to step into second subfile
        if first_subfile != 'others':
            try:
                os.chdir(second_subfile)
            except FileNotFoundError:
                os.makedirs(second_subfile)
                os.chdir(second_subfile)

    def write_into_file(self,
                        filename: str,
                        store_object: object,
                        openmode='a'):
        self.outfile = open(filename, openmode)
        self.outfile.write("\n" + json.dumps(store_object) + "\n")
Example #51
0
class Classify(object):
    """Classify images with TensorFlow and the Coral Edge TPU (threaded)"""
    def __init__(self):

        logger.info("Initialising classifier...")

        self.library = classifiers.get_classifiers()
        self.loaded = {}
        self.active = []

        self.quit_event = Event()
        self.file_queue = Queue()
        self.database = {}

    def _worker(self):

        logger.debug("Initialising classification worker")

        while True:
            try:  # Timeout raises queue.Empty

                image = self.file_queue.get(block=True, timeout=0.1)

            except Empty:
                if self.quit_event.is_set():
                    logger.debug("Quitting thread...")
                    break

            else:
                image = Image.open(image)

                library = self.library
                active = self.active
                database = self.database

                # Iterate over all classifiers
                for name in library:

                    # Only classify active classifiers
                    if name in active:

                        # Ensure classifer is in database
                        try:
                            storage = database[name]
                        except KeyError:
                            storage = {}

                        # Load classifier information
                        engine = self.loaded[name]["model"]
                        labels = self.loaded[name]["labels"]
                        thresholds = self.loaded[name]["thresholds"]

                        # Run inference
                        logger.debug("Starting classifier %s " % (name))

                        try:
                            results = engine.classify_with_image(
                                image, top_k=3,
                                threshold=0)  # Return top 3 probability items
                            logger.debug("%s results: " % (results))
                        except OSError:
                            logger.info("OSError detected, retrying")
                            break

                        # Create dictionary including those not in top_k
                        big_dict = {}
                        for result in results:
                            label = labels[result[0]]
                            confidence = round(result[1].item(), 2)
                            big_dict[label] = confidence

                        not_in_top_k = big_dict.keys() ^ labels.values()
                        for label in not_in_top_k:
                            # Zero confidence ensures moving average keeps moving
                            big_dict[label] = 0

                        # Iterate over the dictionary
                        for label, confidence in big_dict.items():

                            # Ensure label is in classifier storage entry
                            if label not in storage:
                                storage[label] = {}
                                storage[label]["queue"] = [0] * 5

                            # Update nested storage dictionary
                            this_label = storage[label]
                            this_label["confidence"] = confidence

                            # Use deque to update moving average
                            queue = deque(this_label["queue"])
                            queue.append(confidence)
                            queue.popleft()
                            this_label["queue"] = list(queue)
                            average = round(sum(queue) / 5, 2)
                            this_label["average"] = average

                            # Use threshold storage to check whether it exceeds
                            this_label["threshold"] = thresholds[label]
                            this_label[
                                "boolean"] = average >= thresholds[label]

                        # Update database with all information from this classifier
                        database[name] = storage

                    # Remove classifiers in database that are not active
                    elif name in database:
                        del database[name]

                self.database = database

                self.file_queue.task_done()

    def load_classifiers(self, input_string):
        for name in input_string.split(","):

            # Check if classifier has already been loaded
            if name not in self.loaded:
                logger.debug("Loading classifier %s " % (name))

                # Read attributes from library and initialise
                try:
                    attr = self.library[name]
                    output = {}
                    output["labels"] = dataset_utils.read_label_file(
                        attr["labels"])
                    output["model"] = ClassificationEngine(attr["model"])
                    output["thresholds"] = attr["thresholds"]
                    self.loaded[name] = output
                except KeyError:
                    raise KeyError("Classifier name not found in database")
                except FileNotFoundError:
                    raise FileNotFoundError(
                        "Model or labels not found in models folder")

            else:
                logger.debug("Classifier already loaded %s " % (name))

    def set_classifiers(self, input_string):
        for name in input_string.split(","):

            # Check if classifier has already been loaded
            if name not in self.loaded:
                logger.debug("Classifier not loaded %s: loading " % (name))
                self.load_classifiers(name)
        self.active = input_string.split(",")

    def get_classifiers(self):
        return dumps(self.library)

    def start(self, file_path):
        logger.debug("Calling start")
        self.file_queue.put(file_path)

    def join(self):
        logger.debug("Calling join")
        self.file_queue.join()

    def launch(self):
        logger.debug("Initialising classification worker")
        self.thread = Thread(target=self._worker, daemon=True)
        self.thread.start()

    def quit(self):
        self.quit_event.set()
        logger.debug("Waiting for classification thread to finish")
        self.thread.join()
Example #52
0
        print("Performing work.join()")
        work.join()

        # get the results
        print("Retrieving results and building final index")
        while not results.empty():
            partial = results.get()
            if partial is not None:
                for key, val in partial.items():
                    if key in total_index:
                        total_index[key].extend(val)
                    else:
                        total_index[key] = val

            results.task_done()

        # Load the pre-processed docs into an array
    #     pp_docs_list = list()
    #     while not pp_docs.empty():
    #         entry = pp_docs.get()
    #         if entry is not None:
    #             pp_docs_list.append(entry)
    #        pp_docs.task_done()

    # Generate vector
    #     pp_docs_list = sorted(pp_docs_list, key=lambda k: k["id"])
    #     tfidf_docs = [doc["text"] for doc in pp_docs_list]
    #     tfidf_vectors = list()
    #     cv = CountVectorizer()
    #     word_count_vector = cv.fit_transform(tfidf_docs)
        print("get detail html end")


class GetDetailUrl(threading.Thread):
    def run(self):
        print("get detail url started")
        time.sleep(4)
        print("get detail url end")


if __name__ == '__main__':
    thread1 = GetDetailHtml()
    thread2 = GetDetailUrl()
    start_time = time.time()

    thread1.start()
    thread2.start()

    thread1.join()
    thread2.join()

    print("last time: {}".format(time.time() - start_time))

    queue = Queue(maxsize=1000)
    # 两个阻塞方法,存/取
    queue.put("xxx")
    queue.get()
    # 两个成对出现的方法,join 在等待着 task_done
    queue.task_done()
    queue.join()
Example #54
0
class LaGou(object):
    sleeping = 10

    def __init__(self, search):
        self.search = search
        option = webdriver.ChromeOptions()
        option.add_argument(
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; \
                            x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
        )
        self.drive = webdriver.Chrome(chrome_options=option)
        # self.drive = webdriver.Chrome()
        # self.url = 'https://www.lagou.com/jobs/list_python?gj={}px=new&gx={}&city={}'.format('3年及以下', '全职', '北京')
        self.url = 'https://www.lagou.com/jobs/list_python{}'.format(search)
        self.drive.get(self.url)
        print("get_refer")
        # print(self.drive.get_cookies())
        # {cookie[‘name’]: cookie[‘value’] for cookie in driver.get_cookies()}

        self.fin_queue = Queue()
        self.data_queue = Queue()

    @classmethod
    def time_wait(cls):
        time.sleep(cls.sleeping)
        cls.sleeping *= 2
        cls.sleeping = int(cls.sleeping)
        if cls.sleeping >= 10420:
            cls.sleeping = 1

    def crawl_desc(self):
        while True:
            slp = random.randint(1, 10)
            time.sleep(slp * 5)
            cur_data = self.data_queue.get()
            self.time_wait()
            url = cur_data.get('work_url')
            crawl_desc = JobDesc(url)
            detail = crawl_desc.run()
            data = dict(cur_data, **detail)

            self.fin_queue.put(data)
            self.data_queue.task_done()

    def parse_data(self):
        node_list = self.drive.find_elements_by_xpath(
            '//*[@id="s_position_list"]/ul/li/div[@class="list_item_top"]')
        node_li_len = len(node_list)
        print("cur_page_len: " + str(node_li_len))

        for node in node_list:
            temp = {}

            company_node = node.find_element_by_xpath(
                './/div[@class="company_name"]/a')
            temp['company'] = company_node.text
            temp['com_url'] = company_node.get_attribute('href')
            # print(temp['company'])
            # print(temp['com_url'])

            work_node = node.find_element_by_xpath(
                './/a[@class="position_link"]')
            temp['work'] = work_node.find_element_by_xpath('./h3').text
            temp['work_place'] = work_node.find_element_by_xpath('./span').text
            temp['work_url'] = work_node.get_attribute('href')
            print("work: " + temp['work'])
            print("work_place: " + temp['work_place'])
            print("work_url: " + temp['work_url'])

            temp['salary'] = node.find_element_by_xpath(
                './/div[@class="li_b_l"]/span').text
            # print(temp['salary'])

            # self.crawl_desc(temp['work_url'])
            self.data_queue.put(temp)

        return node_li_len

    def __del__(self):
        self.drive.close()

    def go_to_url(self, ct, tm, wk):
        city = self.drive.find_element_by_link_text(ct)
        # ActionChains(self.drive).move_to_element(city).click(city).perform()
        city.click()

        exp = self.drive.find_element_by_xpath(
            '//*[@id="filterCollapse"]/li[1]/a[' + tm + ']')
        exp.click()

        # work = self.drive.find_element_by_xpath('//*[@id="order"]/li/div[3]/div/ul/li[2]/a')
        # work.click()

        print("get: " + self.drive.current_url)

    def save_data(self):
        while True:
            data = self.fin_queue.get()
            print("save_file")
            with open('lagou_' + self.search + '.json', 'a') as f:
                res = json.dumps(data, ensure_ascii=False) + ',\n'
                f.write(res)
            self.fin_queue.task_done()

    def run(self):
        self.go_to_url('北京', WORK_EXP['3年及以下'], WORK['全职'])
        page_len = self.drive.find_element_by_xpath(
            '//span[@page][last()]').text
        print(page_len)
        thread_list = []
        for i in range(int(page_len)):
            time.sleep(1)
            data_len = self.parse_data()
            for i in range(data_len):
                t = threading.Thread(target=self.crawl_desc)
                thread_list.append(t)

                t.setDaemon(True)
                t.start()

            self.data_queue.join()

            thread_save = threading.Thread(target=self.save_data)
            thread_save.setDaemon(True)
            thread_save.start()

            self.fin_queue.join()

            # self.save_data()
            cur_page = self.drive.find_element_by_class_name(
                'pager_is_current').text
            print("cur_page: " + cur_page)
            next_node = self.drive.find_element_by_class_name('pager_next')
            # print(next_node.text)
            next_node.click()
Example #55
0
class Scrape:
    def __init__(self, host, id):
        self.host = host
        self.baseurl = "https://{}/presentation/{}".format(host, id)
        self.id = id

    def create_output_dir(self):
        self.out = "bbb-scrape-{}".format(self.id)
        try:
            os.mkdir(self.out)
        except FileExistsError:
            pass

    def fetch_shapes(self, force=False):
        file = os.path.join(self.out, "shapes.svg")
        if not os.path.exists(file) or force:
            url = "{}/shapes.svg".format(self.baseurl)
            shapes = requests.get(url)
            self.shapes = ElementTree.fromstring(shapes.content)
            open(file, "wb").write(shapes.content)
        else:
            self.shapes = ElementTree.fromstring(open(file, "r").read())

    def fetch_deskshare(self, force=False):
        fname = os.path.join(self.out, "deskshare.mp4")
        fname2 = os.path.join(self.out, "deskshare.xml")
        fname3 = os.path.join(self.out, "deskshare.webm")
        if not os.path.exists(fname) or force:
            url = "{}/deskshare/deskshare.mp4".format(self.baseurl)
            req = requests.get(url)
            if req.status_code == 200:
                print("reading ", url)
                open(fname, "wb").write(req.content)
            else:
                url = "{}/deskshare/deskshare.webm".format(self.baseurl)
                req = requests.get(url)
                if req.status_code == 200:
                    print("reading ", url)
                    open(fname3, "wb").write(req.content)
                else:
                    print("could not read deskshare video")
            url = "{}/deskshare.xml".format(self.baseurl)
            req = requests.get(url)
            print("deskshare.xml URL", url)
            if req.status_code == 200:
                open(fname2, "wb").write(req.content)
                return True
        fname = os.path.join(self.out, "deskshare.webm")
        if not os.path.exists(fname) or force:
            url = "{}/deskshare/deskshare.webm".format(self.baseurl)
            req = requests.get(url)
            if req.status_code == 200:
                open(fname, "wb").write(req.content)
                return True
        return False

    def url_file_exists(self, fname):
        url = "{}/video/{}".format(self.baseurl, fname)
        req = requests.get(url)
        if req.status_code == 200:
            return True
        else:
            return False

    def has_webcam_video(self):
        return self.url_file_exists("webcams.mp4")

    def has_webcam_audio_only(self):
        return self.url_file_exists("webcams.webm")

    def fetch_webcams(self, force=False):
        if self.has_webcam_video():
            url_fname = "webcams.mp4"
            fname = os.path.join(self.out, url_fname)
            print("fetching webcam video stream")
        else:
            url_fname = "webcams.webm"
            fname = os.path.join(self.out, url_fname)
            print("fetching webcam audio stream")
            if not self.has_webcam_audio_only():
                print("ERROR!!!")
        if not os.path.exists(fname) or force:
            url = "{}/video/{}".format(self.baseurl, url_fname)
            print("URL", url)
            req = requests.get(url)
            if req.status_code == 200:
                open(fname, "wb").write(req.content)
                return True
        fname = os.path.join(self.out, "webcams.webm")
        if not os.path.exists(fname) or force:
            url = "{}/video/webcams.webm".format(self.baseurl)
            req = requests.get(url)
            if req.status_code == 200:
                open(fname, "wb").write(req.content)
                return True
        return False

    def fetch_image(self, force=False):
        while self.workq.qsize() > 0:
            e = self.workq.get()
            href = e.attrib["{http://www.w3.org/1999/xlink}href"]
            try:
                uuid = os.path.dirname(href).split("/")[1]
                fname = "{}-{}".format(uuid, os.path.basename(href))
            except IndexError:
                fname = os.path.basename(href)
            file = os.path.join(self.out, fname)
            if not os.path.exists(file) or force:
                url = "{}/{}".format(self.baseurl, href)
                image = requests.get(url)
                open(file, "wb").write(image.content)
            e.attrib["{http://www.w3.org/1999/xlink}href"] = fname
            if "id" in e.attrib:
                self.images.append(
                    Image(id=e.attrib["id"],
                          fname=fname,
                          ts_in=float(e.attrib["in"]),
                          ts_out=float(e.attrib["out"])))
            self.workq.task_done()

    def fetch_images(self, tree=None, force=False):
        if tree is None:
            self.images = []
            self.workq = Queue()
            self.fetch_images(self.shapes, force)
            fname = os.path.join(self.out, "shapes.svg")
            open(fname, "wb").write(ElementTree.tostring(self.shapes))
            return

        for e in tree.findall("svg:image", namespaces):
            self.workq.put(e)
        for e in tree:
            self.fetch_images(e)

    def read_timestamps(self, tree=None):
        if tree is None:
            self.timestamps = []
            self.read_timestamps(self.shapes)
            self.timestamps = list(dict.fromkeys(self.timestamps))
            self.timestamps.sort()
            return

        for e in tree:
            if "in" in e.attrib:
                self.timestamps.append(float(e.attrib["in"]))
            if "out" in e.attrib:
                self.timestamps.append(float(e.attrib["out"]))
            if "timestamp" in e.attrib:
                self.timestamps.append(float(e.attrib["timestamp"]))
            self.read_timestamps(e)

    def generate_frames(self, force=False):
        self.frames = {}

        self.workq = Queue()

        t = 0.0
        for ts in self.timestamps[1:]:
            self.workq.put((t, ts))
            t = ts

        self.generate_frame(force)

    def generate_frame(self, force=False):
        while self.workq.qsize() > 0:
            (timestamp, ts_out) = self.workq.get()
            fname = "shapes{}.png".format(timestamp)
            fnamesvg = "shapes{}.svg".format(timestamp)
            if not os.path.exists(os.path.join(self.out, fnamesvg)) or force:
                shapes = copy.deepcopy(self.shapes)
                image = None
                for i in self.images:
                    if timestamp >= i.ts_in and timestamp < i.ts_out:
                        image = i.id
                for e in shapes.findall("svg:image", namespaces):
                    if e.attrib["id"] == image:
                        e.attrib["style"] = ""
                    else:
                        shapes.remove(e)
                for e in shapes.findall("svg:g", namespaces):
                    assert (e.attrib["class"] == "canvas")
                    if e.attrib["image"] == image:
                        e.attrib["display"] = "inherit"
                        self.make_visible(e, timestamp)
                    else:
                        shapes.remove(e)
                shapestr = ElementTree.tostring(shapes)
                open(os.path.join(self.out, fnamesvg), "wb").write(shapestr)

            if not os.path.exists(os.path.join(self.out, fname)) or force:
                result = subprocess.run([
                    "inkscape", "--export-png={}".format(fname),
                    "--export-area-drawing", fnamesvg
                ],
                                        cwd=self.out,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)

            frame = Frame(fname=fname, ts_in=timestamp, ts_out=ts_out)
            self.frames[timestamp] = frame
            self.workq.task_done()

    def make_visible(self, tree, timestamp):
        for e in tree.findall("svg:g", namespaces):
            if ("timestamp" in e.attrib
                    and float(e.attrib["timestamp"]) <= timestamp):
                style = e.attrib["style"].split(";")
                style.remove("visibility:hidden")
                e.attrib["style"] = ";".join(style)
            else:
                tree.remove(e)

    def generate_concat(self):
        f = open(os.path.join(self.out, "concat.txt"), "w")
        for t in self.timestamps[0:-1]:
            frame = self.frames[t]
            f.write("file '{}'\n".format(frame.fname))
            f.write("duration {:f}\n".format(frame.ts_out - frame.ts_in))
        f.write("file '{}'\n".format(self.frames[self.timestamps[-2]].fname))
        f.close()

    def render_slides(self):
        result = subprocess.run([
            "ffmpeg", "-f", "concat", "-i", "concat.txt", "-pix_fmt",
            "yuv420p", "-y", "-vf", "scale=-2:720", "slides.mp4"
        ],
                                cwd=self.out,
                                stderr=subprocess.PIPE)
Example #56
0
class Mirrors(object):
    """Base for collection of archive mirrors"""
    def __init__(self, url_list, ping_only, min_status):
        self.urls = {}
        self._url_list = url_list
        self._num_trips = 0
        self.got = {"ping": 0, "data": 0}
        self.ranked = []
        self.top_list = []
        self._trip_queue = Queue()
        if not ping_only:
            self._launchpad_base = "https://launchpad.net"
            self._launchpad_url = (self._launchpad_base +
                                   "/ubuntu/+archivemirrors")
            self._launchpad_html = ""
            self.abort_launch = False
            self._status_opts = ("unknown", "One week behind",
                                 "Two days behind", "One day behind",
                                 "Up to date")
            index = self._status_opts.index(min_status)
            self._status_opts = self._status_opts[index:]
            # Default to top
            self.status_num = 1

    def get_launchpad_urls(self):
        """Obtain mirrors' corresponding launchpad URLs"""
        stderr.write("Getting list of launchpad URLs...")
        try:
            self._launchpad_html = get_text(self._launchpad_url)
        except URLGetTextError as err:
            stderr.write(
                ("%s: %s\nUnable to retrieve list of launchpad sites\n"
                 "Reverting to latency only" % (self._launchpad_url, err)))
            self.abort_launch = True
        else:
            stderr.write("done.\n")
            self.__parse_launchpad_list()

    def __parse_launchpad_list(self):
        """Parse Launchpad's list page to find each mirror's
           Official page"""
        soup = BeautifulSoup(self._launchpad_html, PARSER)
        prev = ""
        for element in soup.table.descendants:
            try:
                url = element.a
            except AttributeError:
                pass
            else:
                try:
                    url = url["href"]
                except TypeError:
                    pass
                else:
                    if url in self.urls:
                        self.urls[url]["Launchpad"] = (self._launchpad_base +
                                                       prev)

                    if url.startswith("/ubuntu/+mirror/"):
                        prev = url

    def __kickoff_trips(self):
        """Instantiate round trips class for all, initiating queued threads"""

        for url in self._url_list:
            host = urlparse(url).netloc
            try:
                thread = Thread(
                    target=_RoundTrip(url, host, self._trip_queue).min_rtt)
            except gaierror as err:
                stderr.write("%s: %s ignored\n" % (err, url))
            else:
                self.urls[url] = {"Host": host}
                thread.daemon = True
                thread.start()
                self._num_trips += 1

    def get_rtts(self):
        """Test latency to all mirrors"""

        stderr.write("Testing latency to mirror(s)\n")
        self.__kickoff_trips()

        processed = 0
        progress_msg(processed, self._num_trips)
        for _ in xrange(self._num_trips):
            try:
                min_rtt = self._trip_queue.get(block=True)
            except Empty:
                pass
            else:
                # we can ignore empty rtt results (None) from the queue
                # as in this case ConnectError was already raised
                if min_rtt:
                    self._trip_queue.task_done()
                    self.urls[min_rtt[0]].update({"Latency": min_rtt[1]})
                    self.got["ping"] += 1

            processed += 1
            progress_msg(processed, self._num_trips)

        stderr.write('\n')
        # Mirrors without latency info are removed
        self.urls = {
            key: val
            for key, val in self.urls.items() if "Latency" in val
        }

        self.ranked = sorted(self.urls, key=lambda x: self.urls[x]["Latency"])

    def __queue_lookups(self, codename, arch, data_queue):
        """Queue threads for data retrieval from launchpad.net

           Returns number of threads started to fulfill number of
           requested statuses"""
        num_threads = 0
        for url in self.ranked:
            try:
                launch_url = self.urls[url]["Launchpad"]
            except KeyError:
                pass
            else:
                thread = Thread(target=_LaunchData(url, launch_url, codename,
                                                   arch, data_queue).get_info)
                thread.daemon = True
                thread.start()

                num_threads += 1

            # We expect number of retrieved status requests may already
            # be greater than 0.  This would be the case anytime an initial
            # pass ran into errors.
            if num_threads == (self.status_num - self.got["data"]):
                break

        return num_threads

    def lookup_statuses(self, codename, arch, min_status):
        """Scrape statuses/info in from launchpad.net mirror pages"""
        while (self.got["data"] < self.status_num) and self.ranked:
            data_queue = Queue()
            num_threads = self.__queue_lookups(codename, arch, data_queue)
            if num_threads == 0:
                break
            # Get output of all started thread methods from queue
            progress_msg(self.got["data"], self.status_num)
            for _ in xrange(num_threads):
                try:
                    # We don't care about timeouts longer than 7 seconds as
                    # we're only getting 16 KB
                    info = data_queue.get(block=True, timeout=7)
                except Empty:
                    pass
                else:
                    data_queue.task_done()
                    if info[1] and info[1]["Status"] in self._status_opts:
                        self.urls[info[0]].update(info[1])
                        self.got["data"] += 1
                        self.top_list.append(info[0])
                        progress_msg(self.got["data"], self.status_num)

                    # Eliminate the url from the ranked list as long as
                    # something is received from the queue (for selective
                    # iteration if another queue needs to be built)
                    self.ranked.remove(info[0])

                if (self.got["data"] == self.status_num):
                    break

            # Reorder by latency as queue returns vary building final list
            self.top_list.sort(key=lambda x: self.urls[x]["Latency"])

            data_queue.join()
Example #57
0
class QiubaiSpider:
    def __init__(self):
        self.url_temp = "https://www.qiushibaike.com/text/{}"
        self.headers = {
            "Referer": "http://www.qiushibaike.com",
            "User-Agent":
            "Mozilla/5.0 (Platform; Encryption; OS-or-CPU; Language)"
        }
        self.url_queue = Queue()
        self.html_queue = Queue()
        self.content_queue = Queue()

    def get_url(self):
        # return [self.url_temp.format(i) for i in range(1, 14)]
        for i in range(1, 14):
            self.url_queue.put(self.url_temp.format(i))

    def parse_url(self, ):
        while True:
            url = self.url_queue.get()
            print(url)
            resp = requests.get(url, headers=self.headers)
            # return resp.text
            self.html_queue.put(resp.text)
            self.url_queue.task_done()  #不加回卡住

    def get_content_list(self):
        while True:
            html_str = self.html_queue.get()
            html = etree.HTML(html_str)
            div_list = html.xpath("//div[@class='col1 old-style-col1']/div")
            content_lst = []
            for div in div_list:
                item = {}

                item["content"] = div.xpath(
                    ".//div[@class='content']/span/text()")
                item["content"] = [
                    i.replace("\n", "") for i in item["content"]
                ]

                item["author_gender"] = div.xpath(
                    ".//div[contains(@class,'articleGender')]/@class")
                item["author_gender"] = item["author_gender"][0].replace(
                    "Icon", "") if len(item["author_gender"]) > 0 else None

                item["author_age"] = div.xpath(
                    ".//div[contains(@class,'articleGender')]/text()")
                item["author_age"] = item["author_age"][0] if len(
                    item["author_age"]) > 0 else None

                item["author_pic"] = div.xpath(
                    ".//div[@class='author clearfix']/a/img/@src")
                item["author_pic"] = "https://" + item["author_pic"][0] if len(
                    item["author_pic"]) > 0 else None

                item["vote_number"] = div.xpath(
                    ".//span[@class='stats-vote']/i/text()")
                item["vote_number"] = item["vote_number"][0] if len(
                    item["vote_number"]) > 0 else None

                item["review_count"] = div.xpath(
                    ".//span[@class='stats-comments']/a/i/text()")
                item["review_count"] = item["review_count"][0] if len(
                    item["review_count"]) > 0 else None
                content_lst.append(item)
            # return content_lst
            self.content_queue.put(content_lst)
            self.html_queue.task_done()

    def save(self):
        while True:
            content_list = self.content_queue.get()
            with open("多线程糗事.json", "a", encoding="utf8") as f:
                for data in content_list:
                    f.write(json.dumps(data, ensure_ascii=False))
                    f.write("\n")
            # for data in content_list:
            #     print(data)
            self.content_queue.task_done()

    def run(self):
        thread_list = []
        t_url = threading.Thread(target=self.get_url)
        thread_list.append(t_url)
        for i in range(3):
            t_parse = threading.Thread(target=self.parse_url)
            thread_list.append(t_parse)
        for i in range(2):
            t_html = threading.Thread(target=self.get_content_list)
            thread_list.append(t_html)
        t_save = threading.Thread(target=self.save)
        thread_list.append(t_save)

        for t in thread_list:
            t.setDaemon(True)  #吧子线程设置为守护线程,该线程不重要主线程结束,子线程结束
            t.start()
        for q in [self.url_queue, self.html_queue, self.content_queue]:
            q.join()  #让主线程等待阻塞,等待队列的任务完成之后在完成
        print("主线程结束")
Example #58
0
class QA_Thread(threading.Thread):
    '''
        '这是一个随意新建线程的生产者消费者模型'
        其实有个队列, 队列中保存的是 QA_Task 对象 , callback 很重要,指定任务的时候可以绑定 函数执行
        QA_Engine 继承这个类。
    '''
    def __init__(self, queue=None, name=None):
        threading.Thread.__init__(self)
        self.queue = Queue() if queue is None else queue
        self.thread_stop = False
        self.__flag = threading.Event()  # 用于暂停线程的标识
        self.__flag.set()  # 设置为True
        self.__running = threading.Event()  # 用于停止线程的标识
        self.__running.set()  # 将running设置为True
        self.name = QA_util_random_with_topic(topic='QA_Thread',
                                              lens=3) if name is None else name
        self.idle = False

    def __repr__(self):
        return '<QA_Thread{}  id={}>'.format(self.name, id(self))

    def run(self):
        while self.__running.isSet():
            self.__flag.wait()
            while not self.thread_stop:
                '这是一个阻塞的队列,避免出现消息的遗漏'
                try:
                    if self.queue.empty() is False:
                        _task = self.queue.get()  # 接收消息
                        assert isinstance(_task, QA_Task)
                        if _task.worker != None:

                            _task.do()

                            self.queue.task_done()  # 完成一个任务
                        else:
                            pass
                    else:
                        self.idle = True

                        # Mac book下风扇狂转,如果sleep cpu 占用率回下降
                        # time.sleep(0.01)
                except Exception as e:
                    raise e

    def pause(self):
        self.__flag.clear()

    def resume(self):
        self.__flag.set()  # 设置为True, 让线程停止阻塞

    def stop(self):
        # self.__flag.set()       # 将线程从暂停状态恢复, 如何已经暂停的话
        self.__running.clear()
        self.thread_stop = True  # 设置为False

    def __start(self):
        self.queue.start()

    def put(self, task):
        self.queue.put(task)

    def put_nowait(self, task):
        self.queue.put_nowait(task)

    def get(self, task):
        return self.get(task)

    def get_nowait(self, task):
        return self.get_nowait(task)

    def qsize(self):
        return self.queue.qsize()
Example #59
0
class TwitterListener(StreamListener):
    """A listener class for handling streaming Twitter data."""

    def __init__(self, callback, logs_to_cloud):
        self.logs_to_cloud = logs_to_cloud
        self.logs = Logs(name='twitter-listener', to_cloud=self.logs_to_cloud)
        self.callback = callback
        self.error_status = None
        self.start_queue()

    def start_queue(self):
        """Creates a queue and starts the worker threads."""

        self.queue = Queue()
        self.stop_event = Event()
        self.logs.debug('Starting %s worker threads.' % NUM_THREADS)
        self.workers = []
        for worker_id in range(NUM_THREADS):
            worker = Thread(target=self.process_queue, args=[worker_id])
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    def stop_queue(self):
        """Shuts down the queue and worker threads."""

        # First stop the queue.
        if self.queue:
            self.logs.debug('Stopping queue.')
            self.queue.join()
        else:
            self.logs.warn('No queue to stop.')

        # Then stop the worker threads.
        if self.workers:
            self.logs.debug('Stopping %d worker threads.' % len(self.workers))
            self.stop_event.set()
            for worker in self.workers:
                # Block until the thread terminates.
                worker.join()
        else:
            self.logs.warn('No worker threads to stop.')

    def process_queue(self, worker_id):
        """Continuously processes tasks on the queue."""

        # Create a new logs instance (with its own httplib2 instance) so that
        # there is a separate one for each thread.
        logs = Logs('twitter-listener-worker-%s' % worker_id,
                    to_cloud=self.logs_to_cloud)

        logs.debug('Started worker thread: %s' % worker_id)
        while not self.stop_event.is_set():
            try:
                data = self.queue.get(block=True, timeout=QUEUE_TIMEOUT_S)
                start_time = time()
                self.handle_data(logs, data)
                self.queue.task_done()
                end_time = time()
                qsize = self.queue.qsize()
                logs.debug('Worker %s took %.f ms with %d tasks remaining.' %
                           (worker_id, end_time - start_time, qsize))
            except Empty:
                logs.debug('Worker %s timed out on an empty queue.' %
                           worker_id)
                continue
            except Exception:
                # The main loop doesn't catch and report exceptions from
                # background threads, so do that here.
                logs.catch()
        logs.debug('Stopped worker thread: %s' % worker_id)

    def on_error(self, status):
        """Handles any API errors."""

        self.logs.error('Twitter error: %s' % status)
        self.error_status = status
        self.stop_queue()
        return False

    def get_error_status(self):
        """Returns the API error status, if there was one."""
        return self.error_status

    def on_data(self, data):
        """Puts a task to process the new data on the queue."""

        # Stop streaming if requested.
        if self.stop_event.is_set():
            return False

        # Put the task on the queue and keep streaming.
        self.queue.put(data)
        return True

    def handle_data(self, logs, data):
        """Sanity-checks and extracts the data before sending it to the
        callback.
        """

        try:
            tweet = loads(data)
        except ValueError:
            logs.error('Failed to decode JSON data: %s' % data)
            return

        try:
            user_id_str = tweet['user']['id_str']
            screen_name = tweet['user']['screen_name']
        except KeyError:
            logs.error('Malformed tweet: %s' % tweet)
            return

        # We're only interested in tweets from Mr. Trump himself, so skip the
        # rest.
        if user_id_str != TRUMP_USER_ID:
            logs.debug('Skipping tweet from user: %s (%s)' %
                       (screen_name, user_id_str))
            return

        logs.info('Examining tweet: %s' % tweet)

        # Call the callback.
        self.callback(tweet)
Example #60
0
def api_worker(asin_q: Queue, processed_q: Queue, blocker_q: Queue,
               marketplace_id: str):  # pragma: no cover
    """
    Worker function for threading out api calls

    Args:
        asin_q: Queue with ASINs to be processed on MWS API
        processed_q: ASINs that have already been processed
        blocker_q: ASINs that keep being tried but fail
        marketplace_id: String represeentation

    """
    while True:
        queue_asin = asin_q.get()

        log_asins = ', '.join(queue_asin)

        logging.info("Processing ASINs: %s" % log_asins)

        try:
            asin_data_dict = mws_api.acquire_mws_product_data(
                marketplace_id, queue_asin)
        except Exception as e:
            logging.error(e)
            logging.warning("API is throttling")
            logging.warning("Pausing for a minute")
            asin_q.task_done()
            for asin in queue_asin:
                if asin in blocker_q:
                    continue
            asin_q.put(queue_asin)
            for item in queue_asin:
                blocker_q.put(item)
            searcher.time.sleep(60)
            continue

        searcher.timeout()

        logging.info("Serializing Raw Data for %s" % log_asins)
        serialize_data_to_json(asin_data_dict['raw_data'],
                               Path.home() / config.DATA_DIRECTORY)

        logging.info("Save complete")

        attributes = []
        relationships = []
        for data in asin_data_dict['raw_data']:
            attributes.append(
                flatten_item_attributes(
                    data['Product']['AttributeSets']['ItemAttributes']))
            relationships = relationships + extract_relationships_from_json(
                data['ASIN']['value'], data['Product']['Relationships'])

        write_path = Path.home() / config.DATA_DIRECTORY

        logging.info('Serializing Raw JSON Response')
        serialize_data_to_json(asin_data_dict['raw_data'], write_path)
        logging.info('Saved Raw JSON')

        logging.info(
            'Serializing Two Dimensional Representation of Attributes')
        serialize_data_to_csv(attributes, write_path, extension='txt')
        logging.info('Saved Two Dimensional Representation of Attributes')

        logging.info('Serializing Relationships')
        serialize_data_to_csv(relationships, write_path, extension='dat')
        logging.info('Saved Relationships')

        logging.info('Serializing target values to csv')
        serialize_data_to_csv(asin_data_dict['target_values'], write_path)
        logging.info('Saved target values')

        for asin in queue_asin:
            processed_q.put(asin, block=False)

        related_asins = [
            related_dict['asin'] for related_dict in relationships
        ]
        asins_add = grouper(5, related_asins)

        for group in asins_add:
            for asin in group:
                if asin in processed_q.queue:
                    group.remove(asin)
            if group:
                asin_q.put(group)

        asin_q.task_done()