Esempio n. 1
0
    def __init__(self):
        self.net_hand = NetworkHandler(self.parse_message)
        #self.cursor_thread_run = True
        #self.cursor_thread = Thread(target=self.track_cursor)
        #self.cursor_thread.setDaemon(True)
        #self.u2_pos = None
        self.names = {}
        self.cursor_colors = ['red', 'green', 'blue', 'yellow', 'cyan']

        self.autosave_thread = Thread(target=self.autosave_thread)
        self.autosave_thread.setDaemon(True)

        current_terminal_buffer_column = 0
        current_terminal_buffer_line = 0
        self.log = logging.getLogger('jumpy')

        self.mac = hex(uuid.getnode())
        self.is_host = False
        self.have_perms = False

        self.mac_name = dict()

        self.workspace = Workspace()

        self.create()
Esempio n. 2
0
    def __init__(self):
        """ Initialization """

        self.conf = ConfReader("crawler.conf", default_conf)
        self.logger = Logger()
        self.db = None

        self.thread_pool_size = self.conf.get("thread_pool_size")
        self.left_ip = self.conf.get("manager_ip")
        self.left_port = self.conf.get("manager_port")
        self._buffer_size_threshold = self.conf.get("buffer_size_threshold")
        # how many links we should return when the caller call self.get_links()
        self._crawl_NR = self.conf.get("concurrent_crawl_NR")
        self.my_open = uopen if self.conf.get(
            "buffer_output") == "no" else open
        self.content_path = self.conf.get("content_path")
        self.crawling_timeout = self.conf.get("crawling_timeout")

        self.DB_url = self.conf.get("DB_url")
        self.DB_user = self.conf.get("DB_user")
        self.DB_passwd = self.conf.get("DB_passwd")
        self.crawler_DB = self.conf.get("crawler_DB")
        self.crawler_table = self.conf.get("crawler_table")
        self.db = DBHandler(self.crawler_DB, self.DB_user, self.DB_passwd,
                            self.DB_url)
        self.db.connect()
        # Mysql columns are case insensitive (contrary to Oracle) for search operations
        # and the default behavior can be changed while creating the table by specifying
        # the "BINARY"
        self.db.update("CREATE TABLE IF NOT EXISTS `" + self.crawler_table +
                       "` ("
                       " `page_id` int(20) NOT NULL AUTO_INCREMENT,"
                       " `page_url` varchar(200) BINARY NOT NULL,"
                       " `domain_name` varchar(100) BINARY NOT NULL,"
                       " `inner_links` text,"
                       " `outer_links` text,"
                       " `title` varchar(1024),"
                       " `normal_content` text,"
                       " `emphasized_content` text,"
                       " `keywords` varchar(1024),"
                       " `description` varchar(1024),"
                       " `text` longtext,"
                       " `PR_score` double default 0.0,"
                       " `ad_NR` int default 0,"
                       " `tag1` varchar(20) default null,"
                       " `tag2` varchar(20) default null,"
                       " `tag3` varchar(20) default null,"
                       " INDEX (`page_url`),"
                       " PRIMARY KEY (`page_id`)"
                       ")CHARSET=UTF8, ENGINE=InnoDB")
        self.db.update("truncate table " + self.crawler_table)

        # hold all the links to be sent back to manager
        self._result_dict = {}
        # used to hold all the links which are got from manager
        self._buffer = []
        self.result_sender = NetworkHandler(self.left_ip, self.left_port)
        self.links_requester = NetworkHandler(self.left_ip, self.left_port)
        self.focusing = True  # whether or not the crawling should do focus-crawling
Esempio n. 3
0
    def __init__(self):
        """ Initialization """

        self.conf = ConfReader("crawler.conf", default_conf)
        self.logger = Logger()
        self.db = None

        self.thread_pool_size = self.conf.get("thread_pool_size")
        self.left_ip = self.conf.get("manager_ip")
        self.left_port = self.conf.get("manager_port")
        self._buffer_size_threshold = self.conf.get("buffer_size_threshold")
        # how many links we should return when the caller call self.get_links()
        self._crawl_NR = self.conf.get("concurrent_crawl_NR")
        self.my_open = uopen if self.conf.get("buffer_output") == "no" else open
        self.content_path = self.conf.get("content_path")
        self.crawling_timeout = self.conf.get("crawling_timeout")

        self.DB_url = self.conf.get("DB_url")
        self.DB_user = self.conf.get("DB_user")
        self.DB_passwd = self.conf.get("DB_passwd")
        self.crawler_DB = self.conf.get("crawler_DB")
        self.crawler_table = self.conf.get("crawler_table")
        self.db = DBHandler(self.crawler_DB, self.DB_user,self.DB_passwd,self.DB_url)
        self.db.connect()
        # Mysql columns are case insensitive (contrary to Oracle) for search operations
        # and the default behavior can be changed while creating the table by specifying
        # the "BINARY"
        self.db.update("CREATE TABLE IF NOT EXISTS `" + self.crawler_table + "` ("
                       " `page_id` int(20) NOT NULL AUTO_INCREMENT,"
                       " `page_url` varchar(200) BINARY NOT NULL,"
                       " `domain_name` varchar(100) BINARY NOT NULL,"
                       " `inner_links` text,"
                       " `outer_links` text,"
                       " `title` varchar(1024),"
                       " `normal_content` text,"
                       " `emphasized_content` text,"
                       " `keywords` varchar(1024),"
                       " `description` varchar(1024),"
                       " `text` longtext,"
                       " `PR_score` double default 0.0,"
                       " `ad_NR` int default 0,"
                       " `tag1` varchar(20) default null,"
                       " `tag2` varchar(20) default null,"
                       " `tag3` varchar(20) default null,"
                       " INDEX (`page_url`),"
                       " PRIMARY KEY (`page_id`)"
                       ")CHARSET=UTF8, ENGINE=InnoDB" )
        self.db.update("truncate table " + self.crawler_table)

        # hold all the links to be sent back to manager
        self._result_dict = {}
        # used to hold all the links which are got from manager
        self._buffer = []
        self.result_sender = NetworkHandler(self.left_ip, self.left_port)
        self.links_requester = NetworkHandler(self.left_ip, self.left_port)
        self.focusing = True  # whether or not the crawling should do focus-crawling
Esempio n. 4
0
 def __init__(self, ip, port):
     """ @_source_ip     : ip of the manager
         @_source_port   : port of the manager
         @links_requester: help instance that can help us get links from manager
         @_buffer        : buffer that hold some {url=>resolved_url...}
         @_buffer_size_threshold: if size of buffer is less than this, then we should get some
                                  from manager
         @_nsent         : number of links that we return at a time """
     self._source_ip = ip
     self._source_port = port
     self.links_requester = NetworkHandler(self._source_ip, self._source_port)
     self._buffer = {} #like {url1=>ip1, url2=>ip2, url3=>ip3...}
     self._buffer_size_threshold = 10
     self._nsent = self._buffer_size_threshold #how many links we should return when the caller
Esempio n. 5
0
    def test_NWK_09():
        """
        Tests the basic network functionality using the NetworkHandler
        """

        # basic logging init
        log = logging.getLogger('jumpy')
        log_format = logging.Formatter(
            '%(filename)s - %(lineno)d - %(levelname)s - %(message)s')
        log.setLevel(logging.DEBUG)

        # logging console init
        log_handler_console = logging.StreamHandler()
        log_handler_console.setLevel(logging.DEBUG)
        log_handler_console.setFormatter(log_format)
        log.addHandler(log_handler_console)

        # imports
        from NetworkHandler import NetworkHandler
        import time

        # base setup
        net_hand = NetworkHandler(None)
        net_hand.unit_testing = True
        net_hand.establish_connection()

        time.sleep(1)

        net_hand.close_connection()
Esempio n. 6
0
 def setupInput(self):
     self.netInput = NetworkHandler()
     self.accept("walk-start", self.beginWalk)
     self.accept("walk-stop", self.endWalk)
     self.accept("reverse-start", self.beginReverse)
     self.accept("reverse-stop", self.endReverse)
     self.accept("walk", self.walk)
     self.accept("reverse", self.reverse)
     self.accept("turn", self.turn)
Esempio n. 7
0
    def test_NWK_10():
        """
        Tests a DataPacket
        """

        # basic logging init
        log = logging.getLogger('jumpy')
        log_format = logging.Formatter(
            '%(filename)s - %(lineno)d - %(levelname)s - %(message)s')
        log.setLevel(logging.DEBUG)

        # logging console init
        log_handler_console = logging.StreamHandler()
        log_handler_console.setLevel(logging.DEBUG)
        log_handler_console.setFormatter(log_format)
        log.addHandler(log_handler_console)

        # imports
        from DataPacket import DataPacket
        from NetworkHandler import NetworkHandler
        import time

        helper = Helper()

        # base setup
        net_hand = NetworkHandler(helper.parse_message)
        net_hand.establish_connection()

        time.sleep(1)

        assert net_hand.is_connected

        packet: DataPacket = DataPacket()
        assert packet.data_dict.keys().__contains__('packet-name')
        assert packet.data_dict.keys().__contains__('mac-addr')
        assert packet.data_dict.keys().__contains__('time-of-creation')
        assert not packet.data_dict.keys().__contains__('time-of-send')

        assert packet.data_dict.get('packet-name').__eq__('DataPacket')
        assert packet.data_dict.get('mac-addr') is not None
        assert packet.data_dict.get('time-of-creation') is not None

        packet.set_time_of_send()
        assert packet.data_dict.keys().__contains__('time-of-send')
        assert packet.data_dict.get('time-of-send') is not None

        net_hand.send_packet(packet)
        time.sleep(1)
        net_hand.close_connection()
        assert helper.packet_received_data_dict is not None
        pass
Esempio n. 8
0
class Crawler(object):
    """ The crawler.
        Multiple threads would be started in method run() """

    def __init__(self):
        """ Initialization """

        self.conf = ConfReader("crawler.conf", default_conf)
        self.logger = Logger()
        self.db = None

        self.thread_pool_size = self.conf.get("thread_pool_size")
        self.left_ip = self.conf.get("manager_ip")
        self.left_port = self.conf.get("manager_port")
        self._buffer_size_threshold = self.conf.get("buffer_size_threshold")
        #how many links we should return when the caller call self.get_links()
        self._crawl_NR = self.conf.get("concurrent_crawl_NR")
        self.my_open = uopen if self.conf.get("buffer_output") == "no" else open
        self.content_path = self.conf.get("content_path")
        self.crawling_timeout = self.conf.get("crawling_timeout")

        self.DB_url = self.conf.get("DB_url")
        self.DB_user = self.conf.get("DB_user")
        self.DB_passwd = self.conf.get("DB_passwd")
        self.crawler_DB = self.conf.get("crawler_DB")
        self.crawler_table = self.conf.get("crawler_table")
        self.db = DBHandler(self.crawler_DB, self.DB_user,self.DB_passwd,self.DB_url)
        self.db.connect()
        # Mysql columns are case insensitive (contrary to Oracle) for search operations
        # and the default behavior can be changed while creating the table by specifying
        # the "BINARY"
        self.db.update("CREATE TABLE IF NOT EXISTS `" + self.crawler_table + "` ("
                       " `page_id` int(20) NOT NULL AUTO_INCREMENT,"
                       " `page_url` varchar(200) BINARY NOT NULL,"
                       " `domain_name` varchar(100) BINARY NOT NULL,"
                       " `sublinks` text,"
                       " `title` varchar(1024),"
                       " `normal_content` text,"
                       " `emphasized_content` text,"
                       " `keywords` varchar(1024),"
                       " `description` varchar(1024),"
                       " `text` longtext,"
                       " `PR_score` double default 0.0,"
                       " `ad_NR` int default 0,"
                       " `tag` varchar(20) default null,"
                       #" `classify_attribute_1` ...
                       #" `classify_attribute_2` ...
                       " PRIMARY KEY (`page_id`),"
                       " INDEX (`page_url`)"
                       ")CHARSET=UTF8, ENGINE=InnoDB" )
        self.db.update("truncate table " + self.crawler_table)

        # hold all the links to be sent back to manager
        self._result_dict = {}
        # used to hold all the links which are got from manager
        self._buffer = []
        self.result_sender = NetworkHandler(self.left_ip, self.left_port)
        self.links_requester = NetworkHandler(self.left_ip, self.left_port)
        self.focusing = True  # whether or not the crawling should do focus-crawling


    def get_links(self):
        """ used to get urls from manager.
        we use a buffer, so that we can get 50 links from manager,
        and then return 10 links with call to self.get_links() one by one.
        To do this, for example, user can adjust the 'concurrent_crawl_NR'
        setting in 'conf/crawler.conf' to 10 and 'links_to_crawler_NR' to 50.

        Note that we don't have to set any timeount here,
        because, after all, crawler have to get some links from
        manager side before it can continue """

        # if there are not enough links in the buffer
        if len(self._buffer) < self._buffer_size_threshold:
            try:
                # manager would return links together with a
                # message(self.focusing), which tell the crawler whether it
                # should still be focused-crawling or not
                (self.focusing, links) = self.links_requester.request()
                self.logger.info("links_requester succeed request()")
                if not links:
                    #return whatever in self._buffer
                    tmp = self._buffer
                    self._buffer = []
                    return tmp
                else:
                    self._buffer.extend(links)
                    #make sure that we don't exceed the limit
                    nsent = (self._crawl_NR if self._crawl_NR <= len(self._buffer)
                                            else len(self._buffer))
                    tmp = []
                    for _ in range(nsent):
                        tmp.append(self._buffer.pop())
                    return tmp
            except Exception:
                raise
        else:
            #make sure that we don't exceed the limit
            nsent = (self._crawl_NR if self._crawl_NR <= len(self._buffer)
                                    else len(self._buffer))
            #we have enough links, so just return
            tmp = []
            for _ in range(nsent):
                tmp.append(self._buffer.pop())
            return tmp

    @staticmethod
    def req(url, **kwargs):
        page = requests.get(url, **kwargs)
        trytime = 1
        while trytime < _exceeded_try and page.status_code != 200:
            page = requests.get(url, **kwargs)
            time.sleep(_pause_interval)
            trytime = trytime + 1
        return page


    def get_web(self, resolved_url):
        """used to grab a web information and return a Response object."""

        #fake as 'Baidu Spider'. Can also fake as GoogleBot, or YoudaoBot,
        #but this maybe easily detected due to ip-mismatch
        #NOTE: According to RFC 7230, HTTP header names are case-INsensitive
        headers={
                'Accept':'text/plain, text/html', #want only text
                #"accept-encoding":"gzip, deflate, sdch",
                #"accept-language":"en-US,en;q=0.8",
                #"Cache-Control":"max-age=0",
                #"Cookie":"timezone=480; I2KBRCK=1; cookiePolicy=accept",
                #"Host":"www.tandfonline.com",
                #"Proxy-Connection":"keep-alive",
                #"Referer":"https://www.tandfonline.com",
                #"Upgrade-Insecure-Requests":"1",
                #"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
                "User-agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
                }

        try:
            response = Crawler.req(resolved_url, headers=headers, timeout=self.crawling_timeout)
            self.logger.info("Get response[%d]: [%s]" % (response.status_code, resolved_url))
            #check whether we get a plain text response
            #note that key in `response.headers` is case insensitive
            if 'content-type' in response.headers:
                if 'text/' not in response.headers['content-type']:
                    return None
            if (response.status_code == requests.codes.ok): #200
                return response
            else:
                return None
        except Exception as e:
            self.logger.info("Fail to fetch page. Exception: %s, url:[%s]" % (str(e), resolved_url))
            return None

    def run(self):
        """ main routine of crawler class
            @urls: used to hold the raw urls got from the left.  """

        while (True):
            try:
                urls = self.get_links()
            except Exception as e:
                self.logger.info("Cannot get urls. crawler sleep for 10 seconds.\n"
                        "\tException:[%s]\n" % str(e))
                time.sleep(10) #wait a little bit to see if thing would get better
                continue
            if not urls:
                self.logger.info("Empty urls from dns_resolver. Crawler will loop")
                time.sleep(10)
                continue

            #####DEBUG
            self.logger.info("GOT urls from manager: [")
            for u in urls:
                self.logger.info("\t" + u)
            self.logger.info("  ]")
            #####END

            # 爬取链接
            with ThreadPoolExecutor(self.thread_pool_size) as pool:
                responses = pool.map(self.get_web, urls)

            #开始处理response,将得到的子内链与源链接组合在一起然后返回
            for index, resp in enumerate(responses):
                origin = urls[index]
                if not resp:
                    self._result_dict[origin] = "FAIL"
                else:
                    try:
                        # Note that we resp is already of type 'text/html'
                        # Note that resp.text return unicode string
                        outer_links, inner_links = self.extract_link(origin, resp.text)
                    except Exception as e:
                        self.logger.info(("Exception when extract_links:[%s],"
                                "url:[%s]\n") % (str(e), origin))
                        continue
                    self.logger.info("Finished extract_links()")
                    outer_links = set(outer_links)
                    inner_links = set(inner_links)
                    if self.focusing:
                        self.logger.info("crawler is FOCUSING now.\n")
                        self._result_dict[origin] = self.trim_url_suffix(inner_links)
                    else:
                        self._result_dict[origin] = self.trim_url_suffix(outer_links)

                    # resp.content return 'bytes' object
                    try:
                        self.dump_content(resp, origin)
                    except Exception as e:
                        self.logger.info(("Exception when dump_content():[%s],"
                                "url:[%s]") % (str(e), origin))
                        traceback.print_exc()
                        continue
                    self.logger.info("Finished dump_content()")

            data = pickle.dumps(self._result_dict)
            try:
                self.result_sender.send(data)
                self.logger.info("successfully sent back to the left\n")
            except Exception as e:
                self.logger.info(("Fail sending to manager:[%s]\n"
                                    "unsent links:[%s]\n") % (str(e), str(self._result_dict)))
            finally:
                self._result_dict = {}

    def extract_link(self, origin_url, html):
        """This function is used for extract all links from the web.
           It would distinct the inner links and outer links.
           For inner links, it should add the header and
           delete the tag#, remove .css and javascript link"""
        html_text = etree.HTML(html)
        links = html_text.xpath('//*/a/@href') #all the links, relative or absolute

        origin_url = origin_url.strip()
        # get the url domain to define the website
        protocal, domain = self.get_protocal_domain(origin_url)

        #useless file pattern (something like xxx.jpg, xxx.mp4, xxx.css, xxx.pdf, etc)
        uf_pattern = re.compile(r'\.jpg$|\.png|\.xml|\.mp4|\.mp3|\.css|\.pdf|\.svg|\.gz|\.zip|\.rar|\.exe|\.tar')
        #unsupported protocal pattern(something like ftp://, sftp://, thunders://, etc)
        up_pattern = re.compile(r'^.{0,10}:')
        #we only support http/https protocal
        sp_pattern = re.compile(r'http://|https://')

        outer_link_lists = []
        inner_link_lists = []
        for element in links:
            element = element.strip()
            if re.match(sp_pattern, element):  # begin with http/https
                #first check if this match those useless pattern
                if re.findall(uf_pattern, element):
                    continue
                #check whether it's outer link or inner link
                test_protocal, test_domain = self.get_protocal_domain(element)
                if test_domain != domain:
                    outer_link_lists.append(element.strip())
                else:
                    inner_link_lists.append(element.strip())
            elif re.findall(uf_pattern, element):
                continue
            elif re.findall(up_pattern, element):
                continue
            else:
                if element.startswith('/'):
                    link = protocal + '://' + domain + element
                else:
                    link = protocal + '://' + domain + '/' + element
                inner_link_lists.append(link.strip())

        return (outer_link_lists, inner_link_lists)

    def trim_url_suffix(self, urls):
        """
        trim those urls with suffix `#xxxxx' or `?xxxx'
        NOTE that ALL URLS PASSED IN MUST BE VALID!!!
        """
        def _trim_url_suffix(url): #make it reusable
            #tag link pattern
            return url.split('#')[0].split('?')[0]

        return list(map(_trim_url_suffix, urls))

    def get_protocal_domain(self, url):
        """ return protocal and domain """
        protocal, rest = urllib.parse.splittype(url)
        domain, url_suffix = urllib.parse.splithost(rest)
        return (protocal, domain)

    def dump_content(self, resp, origin_url):
        """ requests cannot detect web page encoding automatically(F**K!).
            response.encoding is from the html reponse header. If we want to
            convert all the content we want to utf8, we have to use `get_encodings_from_content; """
        # resp.text is in unicode(type 'str')
        # resp.content is in unicode(type 'bytes')
        text = resp.text
        # requests get html page encoding from HTTP Response header, if the
        # Response header provide no info about encoding, then requests would
        # default to 'ISO-8859-1'. But most of the time we can detect the
        # encoding in html page content
        if(resp.encoding == 'ISO-8859-1' and not 'ISO-8859-1' in resp.headers.get('Content-Type', '')):
            try:
                real_encoding = requests.utils.get_encodings_from_content(resp.text)[0]
                text = resp.content.decode(real_encoding, 'ignore')
            except Exception:
                text = resp.content.decode('utf-8', 'ignore')
        html_tree = etree.HTML(text)
        kws = html_tree.xpath('//*/meta[re:test(@name, "[Kk]eywords?")]/@content', namespaces={'re': "http://exslt.org/regular-expressions"})
        descs = html_tree.xpath('//*/meta[re:test(@name, "[Dd]escription")]/@content', namespaces={'re': "http://exslt.org/regular-expressions"})
        kw = kws[0] if kws else ""
        desc = descs[0] if descs else ""
        kw = kw.encode('utf-8', 'ignore')
        desc = desc.encode('utf-8', 'ignore')

        try:
            real_encoding = requests.utils.get_encodings_from_content(resp.text)[0]
            utf8_text = resp.content.decode(real_encoding, "ignore").encode('utf-8')
        except Exception:
            utf8_text = resp.content

        # requests的请求会出现重定向。比如
        #       http://bbs.people.com.cn/
        #会被重定向到
        #       http://bbs1.people.com.cn/
        #因此如果我们取 resp.url 作为爬取的 url 的话
        #会导致最终数据库中看到 url 重复。因此这里我
        #我们取传进来的origin_url (bbs, NOT bss1)
        #
        #page_url = bytes(resp.url, 'utf-8')
        page_url = origin_url

        _, domain_name = self.get_protocal_domain(resp.url)
        domain_name = bytes(domain_name, 'utf-8')
        titles = re.findall(rb'<title>(.*?)</title>', utf8_text)
        title = titles[0] if titles else b''

        self.db.update("INSERT INTO " + self.crawler_table + "(`page_url`, `domain_name`,"
                "`title`, `text`, `keywords`, `description`) "
                "VALUES (%s, %s, %s, %s, %s, %s);",
                (page_url, domain_name, title, utf8_text, kw, desc))
Esempio n. 9
0
    def setup(self, resetting=False):
        self.log.info("Started class setup.")

        # Set constant for shots per turn
        self.SHOTS_PER_TURN = 3

        # Create network handler
        if not resetting:
            self.nethandler = NetworkHandler(self,
                                             machineType="host",
                                             resetting=resetting)
            self.log.info("Created network handler.")
        else:
            self.log.info("Resetting, skipped network handler setup...")

        # Create GPIO handler
        if not resetting:
            self.gpioHandler = GPIOHandler()
            self.log.info("Created GPIO handler.")
        else:
            self.log.info("Resetting, skipped gpio handler setup...")

        # Establish connection to other machine
        if not resetting:
            if self.nethandler.connect():
                self.log.info("Connection established to other machine.")
            else:
                self.log.error(
                    "Failed to establish connection to other machine.")
        else:
            self.log.info("Resetting, skipped nethandler connection step...")

        # Create game loop thread
        self.gameLoopThread = threading.Thread(target=self.loop)
        self.log.info("Created game loop thread.")

        # Start GPIO handler thread
        if not resetting:
            self.gpioHandler.buttonThread.start()
            self.log.info("Started buttonThread.")
        else:
            self.gpioHandler.restartThreads()

        # Create and display start frame
        self.startFrame = StartFrame(self.window)
        #self.window.update_idletasks()
        #self.window.update()

        self.log.info("Created start frame. Waiting for start button...")
        while self.gpioHandler.startFlag == False:
            self.startFrame.animate()
            self.window.update_idletasks()
            self.window.update()

        self.log.info("Start button pressed. Continuing with game setup...")
        self.startFrame.destroy()
        self.log.info("Destroied StartFrame.")

        # Create enemy and friendly frames
        self.enemyFrame = EnemyFrame(self, self.window)
        self.friendlyFrame = FriendlyFrame(self.window)
        self.log.info("Created enemy and friendly frames.")

        # Start threads
        if not resetting:
            self.log.info("Starting game loop thread...")
            self.gameLoopThread.start()
            self.log.info("Starting listen loop thread...")
            self.nethandler.listenThread.start()
        else:
            self.log.info("Resetting, starting threads...")
            self.restartThreads()
            self.nethandler.restartThreads()

        # Update window to show new frames
        self.window.update_idletasks()
        self.window.update()
Esempio n. 10
0
class Game:
    def __init__(self, window):
        # Create class logger
        self.log = logging.getLogger("Game")
        self.log.setLevel(logging.INFO)

        # Create flags
        self.exitFlag = False
        self.hostReadyFlag = False
        self.clientReadyFlag = False

        # Add Tkinter protocol to stop all parallel threads when closed
        window.protocol("WM_DELETE_WINDOW", self.closeGame)

        # Store window in class member variable
        self.window = window

        # Start setup
        self.setup()

    # Sets up the game. Handles creating the connection between
    #   the machine and game loop setup
    def setup(self, resetting=False):
        self.log.info("Started class setup.")

        # Set constant for shots per turn
        self.SHOTS_PER_TURN = 3

        # Create network handler
        if not resetting:
            self.nethandler = NetworkHandler(self,
                                             machineType="host",
                                             resetting=resetting)
            self.log.info("Created network handler.")
        else:
            self.log.info("Resetting, skipped network handler setup...")

        # Create GPIO handler
        if not resetting:
            self.gpioHandler = GPIOHandler()
            self.log.info("Created GPIO handler.")
        else:
            self.log.info("Resetting, skipped gpio handler setup...")

        # Establish connection to other machine
        if not resetting:
            if self.nethandler.connect():
                self.log.info("Connection established to other machine.")
            else:
                self.log.error(
                    "Failed to establish connection to other machine.")
        else:
            self.log.info("Resetting, skipped nethandler connection step...")

        # Create game loop thread
        self.gameLoopThread = threading.Thread(target=self.loop)
        self.log.info("Created game loop thread.")

        # Start GPIO handler thread
        if not resetting:
            self.gpioHandler.buttonThread.start()
            self.log.info("Started buttonThread.")
        else:
            self.gpioHandler.restartThreads()

        # Create and display start frame
        self.startFrame = StartFrame(self.window)
        #self.window.update_idletasks()
        #self.window.update()

        self.log.info("Created start frame. Waiting for start button...")
        while self.gpioHandler.startFlag == False:
            self.startFrame.animate()
            self.window.update_idletasks()
            self.window.update()

        self.log.info("Start button pressed. Continuing with game setup...")
        self.startFrame.destroy()
        self.log.info("Destroied StartFrame.")

        # Create enemy and friendly frames
        self.enemyFrame = EnemyFrame(self, self.window)
        self.friendlyFrame = FriendlyFrame(self.window)
        self.log.info("Created enemy and friendly frames.")

        # Start threads
        if not resetting:
            self.log.info("Starting game loop thread...")
            self.gameLoopThread.start()
            self.log.info("Starting listen loop thread...")
            self.nethandler.listenThread.start()
        else:
            self.log.info("Resetting, starting threads...")
            self.restartThreads()
            self.nethandler.restartThreads()

        # Update window to show new frames
        self.window.update_idletasks()
        self.window.update()

    # Restarts the classes threads
    def restartThreads(self):
        self.gameLoopThread = threading.Thread(target=self.loop)
        self.gameLoopThread.start()

    # Kills all threads by activating exit flags
    def closeGame(self):
        self.log.info("Executed closeGame()")
        self.exitFlag = True
        self.nethandler.exitFlag = True
        self.gpioHandler.exitFlag = True
        self.nethandler.osock.close()
        self.nethandler.isock.close()
        if self.nethandler.machineType == 'host':
            self.nethandler.oconnection.close()
            self.nethandler.iconnection.close()

    # Sends the desired shot locations to the other machine when it is the player's turn
    def shoot(self, desiredShots):
        # Convert desiredShots from a list of sets (which are the points) to a data string
        datastr = ''
        for shot in desiredShots:
            datastr += f'{shot[0]},{shot[1]};'
        # Remove the trailing ';' and add '|' terminator
        datastr = datastr[0:len(datastr) - 1] + '|'

    # Processes game input received from network manager
    def process(self, datastr):
        self.log.info("Processing data...")
        complex_cmd = datastr.split('|')
        for data in complex_cmd:
            if data != '':
                self.__process(data)

    # Processes the received data from the other machine
    def __process(self, data):
        if data == "READY_UP":
            if self.nethandler.machineType == "host":
                print("ready up client")
                self.clientReadyFlag = True
            else:
                print("ready up host")
                self.hostReadyFlag = True
        elif data == "LOSS":
            self.exitFlag = True
            self.gpioHandler.writeToLCD("ENEMY DESTROIED!")
            self.gpioHandler.writeToLCD("    YOU WIN!", 2)
            sleep(1)
            self.endGame()
        elif data == "FORFEIT":
            self.exitFlag = True
            self.gpioHandler.writeToLCD("OPPONENT FORFEIT")
            self.gpioHandler.writeToLCD("    YOU WIN!", 2)
            sleep(1)
            self.endGame()
        elif data[0:3] == "SR:":
            data = data.replace("SR:", "")
            self.showShots(data)
        else:
            print("coords and stuffs")
            data = data.replace("READY_UP", "")
            self.sendShotResults(self.checkHits(data))

    # Sends the shot statuses to the other machine
    def sendShotResults(self, hitmiss):
        data = 'SR:'
        # Convert true false list to data str
        for val in hitmiss:
            if val == True:
                data += '1;'
            else:
                data += '0;'
        # Remove trailing ';' and add '|' terminator
        data = data[0:len(data) - 1] + '|'
        self.nethandler.strsend(data)

    # Checks the friendlyFrame board for a hit and updates the image
    def checkHits(self, data):
        self.log.info("Checking hits...")
        hitmiss = []
        for coordstr in data.split(";"):
            print(coordstr)
            temp = coordstr.split(',')
            x = int(temp[0])
            y = int(temp[1])
            print(self.friendlyFrame.shipMap[y][x])
            if self.friendlyFrame.shipMap[y][x] != '-':
                print('hit')
                hitmiss.append(True)
                self.friendlyFrame.hitCell(x, y)
            else:
                hitmiss.append(False)
                self.friendlyFrame.missCell(x, y)
        self.log.info("Set hits if there were any.")
        return hitmiss

    # Updates the enemyFrame to show if player's shots were a hit or miss
    def showShots(self, data):
        shotCounter = 0
        for status in data.split(';'):
            coord = self.previousDesiredShots[shotCounter]
            if status == '1':
                self.enemyFrame.updateCell(coord[0], coord[1], 1)
            else:
                self.enemyFrame.updateCell(coord[0], coord[1], 0)
            shotCounter += 1

    # Checks if there is a win and acts accordingly
    def checkWin(self):
        counter = 0
        for row in self.friendlyFrame.shipMap:
            for cell in row:
                if cell == 'x':
                    counter += 1
        # If every ship part has been hit
        if counter == 17:
            self.nethandler.strsend("LOSS|")
            self.gpioHandler.writeToLCD("NO SHIPS REMAIN!")
            self.gpioHandler.writeToLCD("    YOU LOSE", 2)
            sleep(1)
            self.endGame()

    # Used to end the game without causing thread deadlock
    def endGame(self):
        self.log.info("Ending game...")
        self.gpioHandler.writeToLCD('Ending Game...')
        self.log.info("Press restart button to continue.")
        self.gpioHandler.writeToLCD("Press RESTART to")
        self.gpioHandler.writeToLCD("   play again!", 2)
        while self.gpioHandler.resetFlag == False:
            pass
        self.__reset()

    def __reset(self):
        self.gpioHandler.writeToLCD("RESTARTING...")
        self.exitFlag = True
        self.nethandler.exitFlag = True
        self.gpioHandler.exitFlag = True
        self.log.info("Sleeping for 2 sec...")
        sleep(2)
        self.enemyFrame.destroy()
        self.friendlyFrame.destroy()
        sleep(2)

        self.log.info("Resetting flags")
        self.exitFlag = False
        self.nethandler.exitFlag = False
        self.gpioHandler.exitFlag = False

        self.log.info("Starting game setup again")
        self.setup(resetting=True)

    # Checks the exitFlag and exits
    def checkExitFlag(self):
        if self.exitFlag:
            self.log.critical("Exit flag has been raised.")
            exit(0)

    # Checks the GPIO forfeit flag's status and acts accordingly
    def checkForfeit(self):
        if self.gpioHandler.forfeitFlag == True:
            self.nethandler.strsend("FORFEIT|")
            self.gpioHandler.writeToLCD("YOU FORFEIT")
            sleep(1)
            self.endGame()
            exit(0)

    # General game loop
    def loop(self):
        while True:
            sleep(0.05)
            # Check for win or lose condition
            self.checkWin()
            # Check for exit command
            self.checkExitFlag()
            # Check for forfeit button
            self.checkForfeit()

            # Enable player input for shots
            self.enemyFrame.enableInput()
            self.log.info("Enabled enemyFrame player input.")
            self.gpioHandler.writeToLCD("SELECT TARGETS")

            # Wait until player has chosen their 3 shots
            while self.enemyFrame.ready == False:
                # Check for exit command
                self.checkExitFlag()
                self.checkWin()
                self.checkForfeit()
                # Update shot status leds
                self.gpioHandler.updateShotLEDs(
                    len(self.enemyFrame.desiredShots))
            self.gpioHandler.updateShotLEDs(len(self.enemyFrame.desiredShots))

            self.previousDesiredShots = self.enemyFrame.desiredShots

            # Parse desired shots into transmittable data str
            datastr = ''
            for shot in self.enemyFrame.desiredShots:
                datastr += f'{shot[0]},{shot[1]};'
            # Remove trailing ';' from datastr and add '|' terminator
            datastr = datastr[:len(datastr) - 1] + '|'
            self.log.info("Created datastring: " + datastr)

            # Reset EnemyFrame's desiredShots list
            self.enemyFrame.desiredShots = []
            self.log.info("Reset desiredShots.")
            # Disable player input for shots
            self.enemyFrame.disableInput()
            self.enemyFrame.ready = False
            self.log.info("Disabled player input.")

            self.gpioHandler.writeToLCD(" CANNONS ARMED")
            self.gpioHandler.writeToLCD("Fire when ready", 2)
            self.log.info("Waiting on shoot button press...")
            # while the user has not pressed the shoot button
            while self.gpioHandler.shootFlag == False:
                self.checkExitFlag()
                self.checkWin()
                self.checkForfeit()

            # Reset shot status LEDs
            self.gpioHandler.updateShotLEDs(0)

            # Set machine ready flag
            if self.nethandler.machineType == "host":
                self.hostReadyFlag = True
            else:
                self.clientReadyFlag = True
            self.log.info("Set current machine's ready flag.")

            self.nethandler.strsend("READY_UP|")

            #Wait for other machine to be ready
            self.gpioHandler.writeToLCD("Waiting on other")
            self.gpioHandler.writeToLCD("    Player...", 2)
            self.log.info("Waiting for other machine's ready flag...")
            if self.nethandler.machineType == "host":
                while self.clientReadyFlag == False:
                    self.checkWin()
                    self.checkExitFlag()
                    self.checkForfeit()
            else:
                while self.hostReadyFlag == False:
                    self.checkWin()
                    self.checkExitFlag()
                    self.checkForfeit()
            self.log.info("Received ready flag from other machine")

            # Reset ready flags
            self.clientReadyFlag = False
            self.hostReadyFlag = False

            # Send coords to other machine
            self.nethandler.strsend(datastr)
            self.log.info("Sent coord data string to other machine.")

            # Activate warning LEDs
            self.gpioHandler.displayWarning()
            self.gpioHandler.writeToLCD("INCOMING FIRE!")
            sleep(1)
Esempio n. 11
0
class Window:
    """
    This class handles all display aspects of Jum.py.
    """

    # tk root
    root = Tk()

    # menu bar
    menu_bar = Menu()

    # file sub-menu in the menu bar
    menu_file = Menu(tearoff=False)

    # connections sub-menu in the menu bar
    menu_connections = Menu(tearoff=False)

    # frames for UX
    top_frame = Frame(root)
    bottom_frame = Frame(root)
    files = Frame(top_frame)

    location = Frame(files)
    radio_frame = Frame(files)
    directory = Label(location)
    back = Button(location)

    # functional frames
    code = CodeFrame(top_frame)
    terminal = Text(bottom_frame)

    # other variables
    current_file_name = StringVar()
    current_file = None
    old_text = ""

    # the workspace used by the program
    workspace: Workspace = None

    def __init__(self):
        self.net_hand = NetworkHandler(self.parse_message)
        #self.cursor_thread_run = True
        #self.cursor_thread = Thread(target=self.track_cursor)
        #self.cursor_thread.setDaemon(True)
        #self.u2_pos = None
        self.names = {}
        self.cursor_colors = ['red', 'green', 'blue', 'yellow', 'cyan']

        self.autosave_thread = Thread(target=self.autosave_thread)
        self.autosave_thread.setDaemon(True)

        current_terminal_buffer_column = 0
        current_terminal_buffer_line = 0
        self.log = logging.getLogger('jumpy')

        self.mac = hex(uuid.getnode())
        self.is_host = False
        self.have_perms = False

        self.mac_name = dict()

        self.workspace = Workspace()

        self.create()

    def create(self) -> None:
        """
        Creates the window.
        """

        self.root.title("jum.py")
        self.root.bind('<Key>', self.handle_event)
        self.root.bind('<Button-1>', self.handle_event)

        # menu bar
        self.menu_bar.add_cascade(label='File', menu=self.menu_file)
        self.menu_bar.add_cascade(label='Connections',
                                  menu=self.menu_connections)

        # file sub-menu
        self.menu_file.add_command(label="Open", command=self.open_folder)
        self.menu_file.add_command(label="Save", command=self.save_file)
        self.menu_file.add_command(label="Help", command=self.open_help)

        # connections sub-menu
        # self.menu_connections.add_command(label='Connect', command=self.net_hand.establish_connection)

        def create():
            if self.workspace.is_active:
                val = simpledialog.askstring("Lobby name",
                                             "Please name your lobby")
                username = simpledialog.askstring("Prompt",
                                                  "Please input a username")
                self.mac_name.update({self.mac: username})
                self.net_hand.join_lobby(val)
                self.is_host = True
                self.have_perms = True
                self.net_hand.establish_connection()
                self.back.config(state='disabled')
            else:
                messagebox.showerror("jumpy", "no active workspace")

        def join():
            self.workspace.use_temp_workspace()
            self.open_folder(self.workspace.directory)
            self.code.text.config(state='disabled')
            val = simpledialog.askstring(
                "Lobby name", "Please input the lobby you want to join.")
            username = simpledialog.askstring("Prompt",
                                              "Please input a username")
            self.mac_name.update({self.mac: username})
            self.net_hand.join_lobby(val)
            self.net_hand.establish_connection()
            self.is_host = False
            self.have_perms = False
            dprj = DataPacketRequestJoin()
            dprj.set_name(self.mac_name.get(self.mac))
            self.net_hand.send_packet(dprj)

        def disconnect():
            self.net_hand.close_lobby()
            self.back.config(state='normal')

        self.menu_connections.add_command(label='Disconnect',
                                          command=disconnect)
        self.menu_connections.add_command(label='Create lobby', command=create)
        self.menu_connections.add_command(label='Join lobby', command=join)

        # add menubar to root
        self.root.config(menu=self.menu_bar)

        # terminal default
        self.terminal.insert("1.0", "Console:\n>>>")
        self.current_terminal_buffer_column = 3
        self.current_terminal_buffer_line = 2

        #  text default
        self.old_text = self.code.text.get("1.0", END)

        self.directory.config(width=20, text="Current Folder:\nNone")
        self.back.config(text="cd ..\\", command=self.previous_dir)

        # visual effects
        self.files.config(width=200, bg='light grey')
        self.terminal.config(height=10, borderwidth=5)

        # visual packs
        self.root.geometry("900x600")

        self.top_frame.pack(side="top", fill='both', expand=True)
        self.bottom_frame.pack(side="bottom", fill='both', expand=True)

        self.files.pack(side="left", fill='both')
        self.location.pack(side="top", fill='x')
        self.directory.pack(side="left", fill='x', expand=True)
        self.back.pack(side="right", fill='x', expand=True)

        self.code.pack(side="right", fill='both', expand=True)
        self.terminal.pack(fill='both', expand=True)

    def show(self) -> None:
        """
        Shows the window.
        """
        # self.autosave_thread.start() # TODO: fix for better placing
        #self.cursor_thread.start()
        self.root.mainloop()

    def previous_dir(self):
        if self.workspace.directory != "C:/" and self.workspace.directory:
            split = self.workspace.directory.split("/")
            new_dir = "/".join(split[0:-1])
            if (new_dir == "C:"):
                new_dir += "/"
            self.open_folder(new_dir)

    def open_help(self):
        webbrowser.open_new(
            "https://docs.google.com/document/d/13AHTV3BVfS3ELmaW2cqfzgJ9YmOeBkkkMqViyU-0WDM/edit?usp=sharing"
        )

    # TODO for folders with alot of files add a scrollbar
    def open_folder(self, folder=None):
        if self.net_hand.is_connected:
            self.back.config(state='disabled')
        else:
            self.back.config(state='normal')
        location = ""
        if folder:
            location = folder
        else:
            location = filedialog.askdirectory()

        if location != "":
            #clear text and delete current radio buttons

            self.workspace.open_directory(location)

            # clear text and delete current radio buttons
            self.code.text.delete("1.0", END)

            # folder = os.listdir(location)
            # for item in folder:
            #     item_path = location+ "/" + item
            #     # condition so that folders that start with "." are not displayed
            #     if os.path.isfile(item_path) or not item.startswith("."):
            #         Radiobutton(self.radio_frame, text = item, variable=self.current_file_name, command=self.open_item, value=item_path, indicator=0).pack(fill = 'x', ipady = 0)
            split = str(location).split("/")
            index = -1
            folder_name = split[index]
            while folder_name == "":
                index -= 1
                folder_name = split[index]
            self.directory.config(text="Current Folder:\n" + folder_name)
            # clear text and delete current radio buttons
            self.code.text.delete("1.0", END)
            self.radio_frame.destroy()
            self.radio_frame = Frame(self.files,
                                     width=self.files.cget("width"))
            self.radio_frame.pack(fill="both", expand=True)
            self.options = FilesFrame(self.radio_frame, window=self)
            self.options.populate(self.workspace)
            self.reset_terminal()

            # starts cursor tracking thread
            # TODO: uncomment

    # TODO add functionality to clicking on folders (change current folder to that folder, have a back button to go to original folder) (chad doesn't think this is needed anymore)
    def open_item(self):
        if os.path.isfile(self.current_file_name.get()):
            self.code.text.delete("1.0", END)
            file = open(self.current_file_name.get(), "r")
            self.current_file = file
            try:
                self.code.text.insert(1.0, file.read())
                self.syntax_highlighting()
                self.old_text = self.code.text.get("1.0", END)
            except:
                self.code.text.insert(1.0, "Can not interperate this file")
            file.close()
        else:
            self.open_folder(self.current_file_name.get())
            name = self.current_file_name.get().split("/")[-1]
            self.directory.config(text="Current Folder:\n" + name)

    def save_file(self) -> None:
        f = filedialog.asksaveasfilename(defaultextension=".py")
        to_save_file = open(f, 'w')
        to_save_file.write(self.code.text.get("1.0", END))
        to_save_file.close()

    def update_text(self, action: Action, position: int, character: str):
        self.log.debug(
            'updating text with action: \'{}\', position: \'{}\', character: \'{}\''
            .format(action, position, repr(character)))
        text_current = self.code.text.get("1.0", END)
        text_new = text_current[1:position +
                                1] + character + text_current[position + 1:]
        self.log.debug(
            f"current text:{repr(text_current)} \n updated text {repr(text_new)}"
        )
        self.code.text.delete("1.0", END)
        self.code.text.insert("1.0", text_new)

        # n = 1
        # if action == Action.ADD:
        #     # TODO: fix#
        #     #
        #     text_new = character
        #     if text_new == "\n":
        #         n+=1
        #     self.log.debug("%d.%d"%(n,position))
        #     #self.text.insert("%d.%d"%(n,position), text_new)
        # elif action == Action.REMOVE:
        #     # TODO: implement
        #     pass

    def set_text(self, new_text: str):
        """
        Sets the text on the Text object directly.
        Author: Chad
        Args: new_text: string
        Returns: 
        """
        self.code.text.delete("1.0", END)
        self.code.text.insert("1.0", new_text)

    def handle_event(self, event):
        """
        Interpret keypresses on the local machine and send them off to be processed as
        a data packet. Keeps track of one-edit lag.
        TODO: Don't interpret all keypress as somthing to be sent e.g. don't send _alt_
        Authors: Chad, Ben
        Args: event: str unused?
        Returns:
        Interactions: sends DataPacketDocumentEdit
        """
        # if self.net_hand.is_connected:
        #     new_text = self.code.text.get("1.0", END)
        #     packet = DataPacketDocumentEdit(old_text=self.old_text, new_text=new_text)
        #     if packet.character == '' or new_text == self.old_text:
        #         return
        #     else:
        #         self.net_hand.send_packet(packet)
        # self.syntax_highlighting()
        # self.old_text = self.code.text.get("1.0", END)
        if event.widget == self.terminal:
            # handle terminal event

            cursor_line, cursor_column = [
                int(x) for x in self.terminal.index(INSERT).split('.')
            ]

            if event.char == '\r':
                command = self.terminal.get(
                    str(self.current_terminal_buffer_line) + "." +
                    str(self.current_terminal_buffer_column),
                    END).strip("\n ").split(" ")
                print(command)
                if command[0] != "":
                    if self.workspace.directory:
                        os.chdir(self.workspace.directory)
                        if "cd" in command:
                            if not self.net_hand.is_connected:
                                if len(command) >= 2:
                                    try:
                                        os.chdir(self.workspace.directory +
                                                 "/" +
                                                 " ".join(command[1::]).strip(
                                                     '\'\"'))
                                        self.workspace.open_directory(
                                            os.getcwd().replace("\\", "/"))
                                        self.open_folder(
                                            self.workspace.directory)
                                        return
                                    except:
                                        self.current_terminal_buffer_line += 1
                                        self.terminal.insert(
                                            END, "'" + " ".join(
                                                command[1::]).strip('\'\"') +
                                            "' does not exist as a subdirectory\n"
                                        )
                                else:
                                    os.chdir("C:/")
                                    self.workspace.open_directory(os.getcwd())
                                    self.open_folder("C:/")
                                    return
                            else:
                                self.terminal.insert(
                                    END,
                                    "Can not change directories while in workspace.\n"
                                )
                                self.current_terminal_buffer_line += 1
                        else:
                            error = self.run_command(" ".join(command))
                            if error:
                                self.terminal.insert(END, error)
                                self.current_terminal_buffer_line += 1
                    else:
                        self.terminal.insert(
                            END,
                            "Open a directory before using the console.\n")
                        self.current_terminal_buffer_line += 1

                if self.workspace.directory:
                    self.terminal.insert(END, self.workspace.directory + ">")
                    self.current_terminal_buffer_column = len(
                        self.workspace.directory) + 1
                else:
                    self.terminal.insert(END, ">>>")
                self.terminal.see(END)
                self.current_terminal_buffer_line += 1
                return
            if event.char == '\x03':
                self.reset_terminal()
            if cursor_column < self.current_terminal_buffer_column or cursor_line < self.current_terminal_buffer_line:
                if event.char == '\x08':
                    self.terminal.insert(END, ">")
                self.terminal.mark_set(
                    "insert", "%d.%d" % (self.current_terminal_buffer_line,
                                         self.current_terminal_buffer_column))
        elif event.widget == self.code.text:
            # handle text event

            if self.net_hand.is_connected and self.current_file_name.get(
            ) != "None":
                to_send = DataPacketDocumentEdit()
                to_send.set_document(
                    self.current_file_name.get().split('/')[-1])
                to_send.set_text(self.code.text.get("1.0", END))
                self.net_hand.send_packet(to_send)

                # send a DataPacketCursorUpdate
                position = self.code.text.index(INSERT)
                dpcu = DataPacketCursorUpdate()
                dpcu.set_position(position)
                dpcu.set_document(self.current_file_name.get().split('/')[-1])
                self.net_hand.send_packet(dpcu)

            self.syntax_highlighting()

            # # TODO: chad thinks that this is the answere to hash mis-match
            sleep(0.1)

    def syntax_highlighting(self, lang='python'):
        """
        Highlights key elements of syntax with a color as defined in the 
        language's SyntaxHandler. Only 'python' is currently implemented,   
        but more can easily be added in the future.
        Author: Ben
        Args: lang: string, which language to use
        Returns: 

        TODO: fix so keywords inside another keyword aren't highlighted
        TODO: make so that it doesn't trigger after every character
        TODO: run on seperate thread at interval or trigger (perhaps at spacebar? would reduce work)
        """
        for tag in self.code.text.tag_names():
            self.code.text.tag_delete(tag)
        if lang == 'python':
            SyntaxHandler = Syntax()

        syntax_dict = SyntaxHandler.get_color_dict()
        for kw in SyntaxHandler.get_keywords():
            idx = '1.0'
            color = syntax_dict[kw]
            self.code.text.tag_config(color, foreground=color)
            # search_term =#rf'\\y{kw}\\y'   # ' '+ kw + ' '
            while idx:
                idx = self.code.text.search('\\y' + kw + '\\y',
                                            idx,
                                            nocase=1,
                                            stopindex=END,
                                            regexp=True)
                if idx:
                    # self.log.debug(idx)
                    nums = idx.split('.')
                    nums = [int(x) for x in nums]
                    # self.log.debug(f"{left} { right}")
                    lastidx = '%s+%dc' % (idx, len(kw))
                    self.code.text.tag_add(color, idx, lastidx)
                    idx = lastidx
        self.code.text.tag_config("comments", foreground="olive drab")
        idx = '1.0'
        while idx != '':
            idx = self.code.text.search('#', idx, nocase=1, stopindex=END)
            #self.log.debug(idx)
            if idx == '':
                #self.log.debug(idx)
                continue
            #self.log.debug(idx)
            endl = self.code.text.search('\n', idx, stopindex=END)
            if endl == "":
                endl = END
            self.code.text.tag_add("comments", idx, endl)
            idx = endl

    def reset_terminal(self):
        self.terminal.delete("1.0", END)
        self.terminal.insert(END, "Console:\n")
        if self.workspace.directory:
            self.terminal.insert(END, self.workspace.directory + ">")
            self.current_terminal_buffer_column = len(
                self.workspace.directory) + 1
        else:
            self.terminal.insert(END, ">>>")
            self.current_terminal_buffer_column = 3
        self.current_terminal_buffer_line = 2

    def run_command(self, command):
        try:
            process = subprocess.Popen(shlex.split(command),
                                       stdout=subprocess.PIPE)
            while True:
                output = process.stdout.readline()
                if output == bytes('', "utf-8") and process.poll() == 0:
                    break
                if output:
                    self.terminal.insert(END,
                                         output.strip() + bytes("\n", "utf-8"))
                    self.current_terminal_buffer_line += 1
            self.terminal.insert(END, "\n")
            self.current_terminal_buffer_line += 1
        except:
            return "'" + command + "' is not a valid command\n"

    def parse_message(self, packet_str: DataPacket):
        data_dict = json.loads(packet_str)
        packet_name = data_dict.get('packet-name')
        if data_dict.get('mac-addr') == self.mac:
            self.log.debug('received packet from self, ignoring...')
        else:
            self.log.debug('Received a \'{}\''.format(packet_name))
            print(data_dict)

            if packet_name == 'DataPacket':
                self.log.debug('Received a DataPacket')

            elif packet_name == 'DataPacketDocumentEdit':
                self.log.debug('Received a DataPacketDocumentEdit')

                cursor_index = self.code.text.index(INSERT)

                packet: DataPacketDocumentEdit = DataPacketDocumentEdit()
                packet.parse_json(packet_str)
                self.workspace.apply_data_packet_document_edit(packet)
                current_doc = self.current_file_name.get().split('/')[-1]
                if packet.get_document() == current_doc:
                    self.code.text.delete("1.0", END)
                    self.code.text.insert(END, packet.get_text())
                    self.syntax_highlighting()

                self.code.text.mark_set(INSERT, cursor_index)

            elif packet_name == 'DataPacketCursorUpdate':
                u2_pos = data_dict.get('position')
                name = data_dict.get('mac-addr')
                self.cursor_update(u2_pos, str(name))

            elif packet_name == 'DataPacketRequestJoin':
                packet: DataPacketRequestJoin = DataPacketRequestJoin()
                packet.parse_json(packet_str)
                if self.is_host:
                    result = messagebox.askyesno(
                        "jumpy request",
                        "Allow \'{}\' to join the lobby?".format(
                            data_dict.get(DataPacketRequestJoin.KEY_NAME)))
                    dprr = DataPacketRequestResponse()
                    dprr.set_target_mac(packet.get_mac_addr())
                    dprr.set_can_join(result)
                    self.net_hand.send_packet(dprr)
                    if result:
                        sleep(3)
                        name_broadcast = DataPacketNameBroadcast()
                        name_broadcast.set_name(self.mac_name.get(self.mac))
                        self.net_hand.send_packet(name_broadcast)
                        to_send = self.workspace.get_save_dump()
                        for packet in to_send:
                            self.net_hand.send_packet(packet)

            elif packet_name == 'DataPacketRequestResponse':
                packet: DataPacketRequestResponse = DataPacketRequestResponse()
                packet.parse_json(packet_str)
                if packet.get_target_mac() == DataPacket.get_mac_addr_static():
                    self.log.debug('Received a DataPacketRequestResponse')
                    can_join = packet.get_can_join()

                    # todo: fix
                    if can_join:
                        self.log.debug('allowed into the lobby')
                        self.workspace.use_temp_workspace()
                        self.have_perms = True
                        messagebox.showinfo(
                            "jumpy", "You have been accepted into the lobby!")
                    else:
                        self.log.debug('rejected from the lobby')
                        self.have_perms = False
                        messagebox.showerror(
                            "jumpy",
                            "You have NOT been accepted into the lobby...")
                        self.net_hand.close_connection()

                name_broadcast = DataPacketNameBroadcast()
                name_broadcast.set_name(self.mac_name.get(self.mac))
                self.net_hand.send_packet(name_broadcast)

            elif packet_name == 'DataPacketSaveDump':
                packet: DataPacketSaveDump = DataPacketSaveDump()
                packet.parse_json(packet_str)
                self.workspace.apply_data_packet_save_dump(packet)
                if self.workspace.new_file_added:
                    if len(self.workspace.files) == packet.get_workspace_size(
                    ):
                        self.log.debug(
                            'received whole workspace, setting code.text state to normal'
                        )
                        self.code.text.config(state='normal')
                        self.open_folder(self.workspace.directory)

            elif packet_name == 'DataPacketSaveRequest':
                to_send = self.workspace.get_save_dump_from_document(
                    data_dict.get('document'))
                self.net_hand.send_packet(to_send)

            elif packet_name == 'DataPacketNameBroadcast':
                packet = DataPacketNameBroadcast()
                packet.parse_json(packet_str)
                self.log.debug('mac_name updating {} to {}'.format(
                    packet.get_mac_addr(), packet.get_name()))
                self.mac_name.update(
                    {packet.get_mac_addr(): packet.get_name()})

            else:
                self.log.warning(
                    'Unknown packet type: \'{}\''.format(packet_name))
                return False

    def get_words(self):
        """
        Gets all words (definition: seperated by a space character) in the
        Text object.
        Author: Ben
        Args: 
        Returns: words: list a list a words in the Text object
        """
        words = self.code.text.get("1.0", END).split(" ")
        return words

    def cursor_update(self, pos, name):
        if name not in self.names:
            self.names[name] = self.cursor_colors.pop()
        color = self.names[name]
        print(color)
        self.code.text.tag_remove(color, "1.0", END)
        curs = self.code.text.tag_config(color, background=color)
        pos_int = [int(x) for x in pos.split(".")]
        end_pos = f'{pos_int[0]}.{pos_int[1]+1}'
        self.code.text.tag_add(color, pos, end_pos)

    def autosave_thread(self):
        while True:
            sleep(10)
            if self.is_host:
                self.log.debug("autosaving...")
                self.autosave()
            else:
                pass

    def autosave(self):
        if self.is_host:
            to_send = self.workspace.get_save_dump()
            for packet in to_send:
                self.net_hand.send_packet(packet)
Esempio n. 12
0
class Crawler(object):
    """ The crawler.
        Multiple threads would be started in method run() """
    def __init__(self):
        """ Initialization """

        self.conf = ConfReader("crawler.conf", default_conf)
        self.logger = Logger()
        self.db = None

        self.thread_pool_size = self.conf.get("thread_pool_size")
        self.left_ip = self.conf.get("manager_ip")
        self.left_port = self.conf.get("manager_port")
        self._buffer_size_threshold = self.conf.get("buffer_size_threshold")
        # how many links we should return when the caller call self.get_links()
        self._crawl_NR = self.conf.get("concurrent_crawl_NR")
        self.my_open = uopen if self.conf.get(
            "buffer_output") == "no" else open
        self.content_path = self.conf.get("content_path")
        self.crawling_timeout = self.conf.get("crawling_timeout")

        self.DB_url = self.conf.get("DB_url")
        self.DB_user = self.conf.get("DB_user")
        self.DB_passwd = self.conf.get("DB_passwd")
        self.crawler_DB = self.conf.get("crawler_DB")
        self.crawler_table = self.conf.get("crawler_table")
        self.db = DBHandler(self.crawler_DB, self.DB_user, self.DB_passwd,
                            self.DB_url)
        self.db.connect()
        # Mysql columns are case insensitive (contrary to Oracle) for search operations
        # and the default behavior can be changed while creating the table by specifying
        # the "BINARY"
        self.db.update("CREATE TABLE IF NOT EXISTS `" + self.crawler_table +
                       "` ("
                       " `page_id` int(20) NOT NULL AUTO_INCREMENT,"
                       " `page_url` varchar(200) BINARY NOT NULL,"
                       " `domain_name` varchar(100) BINARY NOT NULL,"
                       " `inner_links` text,"
                       " `outer_links` text,"
                       " `title` varchar(1024),"
                       " `normal_content` text,"
                       " `emphasized_content` text,"
                       " `keywords` varchar(1024),"
                       " `description` varchar(1024),"
                       " `text` longtext,"
                       " `PR_score` double default 0.0,"
                       " `ad_NR` int default 0,"
                       " `tag1` varchar(20) default null,"
                       " `tag2` varchar(20) default null,"
                       " `tag3` varchar(20) default null,"
                       " INDEX (`page_url`),"
                       " PRIMARY KEY (`page_id`)"
                       ")CHARSET=UTF8, ENGINE=InnoDB")
        self.db.update("truncate table " + self.crawler_table)

        # hold all the links to be sent back to manager
        self._result_dict = {}
        # used to hold all the links which are got from manager
        self._buffer = []
        self.result_sender = NetworkHandler(self.left_ip, self.left_port)
        self.links_requester = NetworkHandler(self.left_ip, self.left_port)
        self.focusing = True  # whether or not the crawling should do focus-crawling

    def get_links(self):
        """ used to get urls from manager.
        we use a buffer, so that we can get 50 links from manager,
        and then return 10 links with call to self.get_links() one by one.
        To do this, for example, user can adjust the 'concurrent_crawl_NR'
        setting in 'conf/crawler.conf' to 10 and 'links_to_crawler_NR' to 50.

        Note that we don't have to set any timeount here,
        because, after all, crawler have to get some links from
        manager side before it can continue """

        # if there are not enough links in the buffer
        if len(self._buffer) < self._buffer_size_threshold:
            try:
                # manager would return links together with a
                # message(self.focusing), which tell the crawler whether it
                # should still be focused-crawling or not
                (self.focusing, links) = self.links_requester.request()
                self.logger.info("links_requester succeed request()")
                if not links:
                    # return whatever in self._buffer
                    tmp = self._buffer
                    self._buffer = []
                    return tmp
                else:
                    self._buffer.extend(links)
                    # make sure that we don't exceed the limit
                    nsent = (self._crawl_NR
                             if self._crawl_NR <= len(self._buffer) else len(
                                 self._buffer))
                    tmp = []
                    for _ in range(nsent):
                        tmp.append(self._buffer.pop())
                    return tmp
            except Exception:
                raise
        else:
            # make sure that we don't exceed the limit
            nsent = (self._crawl_NR if self._crawl_NR <= len(self._buffer) else
                     len(self._buffer))
            # we have enough links, so just return
            tmp = []
            for _ in range(nsent):
                tmp.append(self._buffer.pop())
            return tmp

    @staticmethod
    def req(url, **kwargs):
        page = requests.get(url, **kwargs)
        trytime = 1
        while trytime < _exceeded_try and page.status_code != 200:
            page = requests.get(url, **kwargs)
            time.sleep(_pause_interval)
            trytime = trytime + 1
        return page

    def get_web(self, resolved_url):
        """used to grab a web information and return a Response object."""

        # fake as 'Baidu Spider'. Can also fake as GoogleBot, or YoudaoBot,
        # but this maybe easily detected due to ip-mismatch
        # NOTE: According to RFC 7230, HTTP header names are case-INsensitive
        headers = {
            'Accept':
            'text/plain, text/html',  #want only text
            # "accept-encoding":"gzip, deflate, sdch",
            # "accept-language":"en-US,en;q=0.8",
            # "Cache-Control":"max-age=0",
            # "Cookie":"timezone=480; I2KBRCK=1; cookiePolicy=accept",
            # "Host":"www.tandfonline.com",
            # "Proxy-Connection":"keep-alive",
            # "Referer":"https://www.tandfonline.com",
            # "Upgrade-Insecure-Requests":"1",
            # "User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36",
            "User-agent":
            "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
        }

        try:
            response = Crawler.req(resolved_url,
                                   headers=headers,
                                   timeout=self.crawling_timeout)
            self.logger.info("Get response[%d]: [%s]" %
                             (response.status_code, resolved_url))
            # check whether we get a plain text response
            # note that key in `response.headers` is case insensitive
            if 'content-type' in response.headers:
                if 'text/' not in response.headers['content-type']:
                    return None
            if response.status_code == requests.codes.ok:  # 200
                return response
            else:
                return None
        except Exception as e:
            self.logger.info("Fail to fetch page. Exception: %s, url:[%s]" %
                             (str(e), resolved_url))
            return None

    def run(self):
        """ main routine of crawler class
            @urls: used to hold the raw urls got from the left.  """

        while True:
            try:
                urls = self.get_links()
            except Exception as e:
                self.logger.info(
                    "Cannot get urls. Crawler sleep for 10 seconds.\n"
                    "\tException:[%s]\n" % str(e))
                time.sleep(
                    10)  # wait a little bit to see if thing would get better
                continue
            if not urls:
                self.logger.info(
                    "Empty urls from dns_resolver. Crawler will loop")
                time.sleep(10)
                continue

            #####DEBUG
            self.logger.info("GOT urls from manager: [")
            for u in urls:
                self.logger.info("\t" + u)
            self.logger.info("  ]")
            #####END

            # 爬取链接
            with ThreadPoolExecutor(self.thread_pool_size) as pool:
                responses = pool.map(self.get_web, urls)

            # 开始处理response,将得到的子内链与源链接组合在一起然后返回
            for index, resp in enumerate(responses):
                origin = urls[index]
                if not resp:
                    self._result_dict[origin] = "FAIL"
                else:
                    try:
                        # Note that we resp is already of type 'text/html'
                        # Note that resp.text return unicode string
                        outer_links, inner_links = self.extract_link(
                            origin, resp.text)
                    except Exception as e:
                        self.logger.info(("Exception when extract_links:[%s],"
                                          "url:[%s]\n") % (str(e), origin))
                        continue
                    self.logger.info("Finished extract_links()")
                    outer_links = set(outer_links)
                    inner_links = set(inner_links)
                    if self.focusing:
                        self.logger.info("crawler is FOCUSING now.\n")
                        self._result_dict[origin] = self.trim_url_suffix(
                            inner_links)
                    else:
                        self._result_dict[origin] = self.trim_url_suffix(
                            outer_links)

                    # resp.content return 'bytes' object
                    try:
                        self.dump_content(resp, origin, inner_links,
                                          outer_links)
                    except Exception as e:
                        self.logger.info(("Exception when dump_content():[%s],"
                                          "url:[%s]") % (str(e), origin))
                        traceback.print_exc()
                        continue
                    self.logger.info("Finished dump_content()")

            data = pickle.dumps(self._result_dict)
            try:
                self.result_sender.send(data)
                self.logger.info("successfully sent back to the left\n")
            except Exception as e:
                self.logger.info(
                    ("Fail sending to manager:[%s]\n"
                     "unsent links:[%s]\n") % (str(e), str(self._result_dict)))
            finally:
                self._result_dict = {}

    def extract_link(self, origin_url, html):
        """This function is used for extract all links from the web.
           It would distinct the inner links and outer links.
           For inner links, it should add the header and
           delete the tag#, remove .css and javascript link"""
        html_text = etree.HTML(html)
        links = html_text.xpath(
            '//*/a/@href')  #all the links, relative or absolute

        origin_url = origin_url.strip()
        # get the url domain to define the website
        protocal, domain = self.get_protocal_domain(origin_url)

        # useless file pattern (something like xxx.jpg, xxx.mp4, xxx.css, xxx.pdf, etc)
        uf_pattern = re.compile(
            r'\.jpg$|\.png|\.xml|\.mp4|\.mp3|\.css|\.pdf|\.svg|\.gz|\.zip|\.rar|\.exe|\.tar'
        )
        # unsupported protocal pattern(something like ftp://, sftp://, thunders://, etc)
        up_pattern = re.compile(r'^.{0,10}:')
        # we only support http/https protocal
        sp_pattern = re.compile(r'http://|https://')

        outer_link_lists = []
        inner_link_lists = []
        for element in links:
            element = element.strip()
            if re.match(sp_pattern, element):  # begin with http/https
                # first check if this match those useless pattern
                if re.findall(uf_pattern, element):
                    continue
                # check whether it's outer link or inner link
                test_protocal, test_domain = self.get_protocal_domain(element)
                if test_domain != domain:
                    outer_link_lists.append(element.strip())
                else:
                    inner_link_lists.append(element.strip())
            elif re.findall(uf_pattern, element):
                continue
            elif re.findall(up_pattern, element):
                continue
            else:
                if element.startswith('/'):
                    link = protocal + '://' + domain + element
                else:
                    link = protocal + '://' + domain + '/' + element
                inner_link_lists.append(link.strip())

        return outer_link_lists, inner_link_lists

    def trim_url_suffix(self, urls):
        """
        trim those urls with suffix `#xxxxx' or `?xxxx'
        NOTE that ALL URLS PASSED IN MUST BE VALID!!!
        """
        def _trim_url_suffix(url):  # make it reusable
            # tag link pattern
            return url.split('#')[0].split('?')[0]

        return list(map(_trim_url_suffix, urls))

    def get_protocal_domain(self, url):
        """ return protocal and domain """
        protocal, rest = urllib.parse.splittype(url)
        domain, url_suffix = urllib.parse.splithost(rest)
        return protocal, domain

    def dump_content(self, resp, origin_url, inner_links, outer_links):
        """ requests cannot detect web page encoding automatically(F**K!).
            response.encoding is from the html reponse header. If we want to
            convert all the content we want to utf8, we have to use `get_encodings_from_content; """
        # resp.text is in unicode(type 'str')
        # resp.content is in unicode(type 'bytes')
        text = resp.text
        # requests get html page encoding from HTTP Response header, if the
        # Response header provide no info about encoding, then requests would
        # default to 'ISO-8859-1'. But most of the time we can detect the
        # encoding in html page content
        if resp.encoding == 'ISO-8859-1' and not 'ISO-8859-1' in resp.headers.get(
                'Content-Type', ''):
            try:
                real_encoding = requests.utils.get_encodings_from_content(
                    resp.text)[0]
                text = resp.content.decode(real_encoding, 'ignore')
            except Exception:
                text = resp.content.decode('utf-8', 'ignore')
        html_tree = etree.HTML(text)
        kws = html_tree.xpath(
            '//*/meta[re:test(@name, "[Kk]eywords?")]/@content',
            namespaces={'re': "http://exslt.org/regular-expressions"})
        descs = html_tree.xpath(
            '//*/meta[re:test(@name, "[Dd]escription")]/@content',
            namespaces={'re': "http://exslt.org/regular-expressions"})
        kw = kws[0] if kws else ""
        desc = descs[0] if descs else ""
        kw = kw.encode('utf-8', 'ignore')
        desc = desc.encode('utf-8', 'ignore')

        try:
            real_encoding = requests.utils.get_encodings_from_content(
                resp.text)[0]
            utf8_text = resp.content.decode(real_encoding,
                                            "ignore").encode('utf-8')
        except Exception:
            utf8_text = resp.content

        # requests的请求会出现重定向。比如
        #       http://bbs.people.com.cn/
        # 会被重定向到
        #       http://bbs1.people.com.cn/
        # 因此如果我们取 resp.url 作为爬取的 url 的话
        # 会导致最终数据库中看到 url 重复。因此这里我
        # 我们取传进来的origin_url (bbs, NOT bss1)
        #
        # page_url = bytes(resp.url, 'utf-8')
        page_url = origin_url

        _, domain_name = self.get_protocal_domain(resp.url)
        domain_name = bytes(domain_name, 'utf-8')
        titles = re.findall(rb'<title>(.*?)</title>', utf8_text)
        title = titles[0] if titles else b''
        inner_links = ";".join(inner_links)
        outer_links = ";".join(outer_links)

        self.db.update(
            "INSERT INTO " + self.crawler_table + "(`page_url`, `domain_name`,"
            "`inner_links`,`outer_links`,`title`, `text`, `keywords`, `description`) "
            "VALUES (%s, %s, %s, %s, %s, %s, %s, %s);",
            (page_url, domain_name, inner_links, outer_links, title, utf8_text,
             kw, desc))
Esempio n. 13
0
class DNSResolver:
    """ Class to help resolve url's IP address """
    #mmap_file = uopen("/tmp/DNSResolver.mmap","w+")
    #mmap_file.truncate(5*1024*1024) #500M space for the file
    #_mmap = mmap.mmap(mmap_file.fileno(),0) #0 means that whole file
    #DNS_cache = _mmap   #use _mmap as the same way as list in python
    DNS_cache = []   #we are not using memory mapping here for simplicity
    cache_lock = threading.Lock()  #lock to access DNS_cache
    log_file = uopen("DNSResolver.log", "w+")
    log_lock = threading.Lock()

    def __init__(self, ip, port):
        """ @_source_ip     : ip of the manager
            @_source_port   : port of the manager
            @links_requester: help instance that can help us get links from manager
            @_buffer        : buffer that hold some {url=>resolved_url...}
            @_buffer_size_threshold: if size of buffer is less than this, then we should get some
                                     from manager
            @_nsent         : number of links that we return at a time """
        self._source_ip = ip
        self._source_port = port
        self.links_requester = NetworkHandler(self._source_ip, self._source_port)
        self._buffer = {} #like {url1=>ip1, url2=>ip2, url3=>ip3...}
        self._buffer_size_threshold = 10
        self._nsent = self._buffer_size_threshold #how many links we should return when the caller
                                                  #call self.get_resolved_url_packet()

    def resolve_url(self, links):
        """ resolved the `links` passed in """
        resolved_links = []
        with DNSResolver.cache_lock:
            for link in links:
                protocol, rest = urllib.parse.splittype(link)
                host, url_suffix = urllib.parse.splithost(rest)
                resolved = None
                for dictionary in DNSResolver.DNS_cache:
                    if host == list(dictionary.keys())[0]:
                        ip_address = list(dictionary.values())[0]
                        resolved = protocol + "://" + ip_address + url_suffix
                        break
                if not resolved:
                    try:
                        print("Querying DNS server...")
                        ip_address = socket.gethostbyname(host)
                        print("Get ip[%s] from DNS server" % str(ip_address))
                        DNSResolver.DNS_cache.append({host:ip_address})
                        resolved = protocol + "://" + ip_address + url_suffix
                    except Exception as e:
                        with DNSResolver.log_lock:
                            DNSResolver.log_file.write(
                                "Exception when querying DNS server:[%s]\n" % str(e))
                        resolved = None
                resolved_links.append(resolved)
        return resolved_links

    def get_resolved_url_packet(self):
        """ used to get url=>resolved_url from Manager side.
            Note that we don't have to set any timeout here,
            because, after all, crawler have to get some links from
            Manager side before it can continue"""
        if len(self._buffer) < self._buffer_size_threshold:
            try:
                links = self.links_requester.request()
                if not links:
                    #return whatever in self._buffer
                    tmp = self._buffer
                    self._buffer = {}
                    return tmp
                else:
                    #resolved_links = self.resolve_url(links)
                    #self._buffer.update(zip(links, resolved_links))
                    self._buffer.update(zip(links, links))
                    #make sure that we don't exceed the limit
                    nsent = (self._nsent if self._nsent <= len(self._buffer)
                                            else len(self._buffer))
                    tmp = {}
                    pairs = list(self._buffer.items())
                    to_be_del = []
                    for index in range(nsent):
                        url, resolved = pairs[index]
                        tmp[url] = resolved
                        to_be_del.append(url)
                    for item in to_be_del:
                        del self._buffer[item]
                    return tmp
            except Exception:
                raise
        else:
            #make sure that we don't exceed the limit
            nsent = (self._nsent if self._nsent <= len(self._buffer)
                                    else len(self._buffer))
            #we have enough links, so just return
            tmp = {}
            pairs = list(self._buffer.items())
            to_be_del = []
            for index in range(nsent):
                url, resolved = pairs[index]
                tmp[url] = resolved
                to_be_del.append(url)
            for item in to_be_del:
                del self._buffer[item]
            return tmp