Python MyLogger.log Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: MyLogger

Method/Function: log

Examples at hotexamples.com: 3

Python MyLogger.log - 3 examples found. These are the top rated real world Python examples of utils.MyLogger.log extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MyLogger(11)

info(5)

log(3)

disable_console(1)

disable_file(1)

getLogger(1)

warning(1)

Example #1

Show file

File: crawler.py Project: Yarince/Crawler-Kruipertje

    def work(self, thread_name, url_dao):
        """
        This method is assigned to threads.
        As long as there are items in the queue this method wil start crawling them.

        :param thread_name: The name of the current thread
        :param url_dao: an instance of UrlDAO
        :return: Nothing
        """
        try:
            while True:
                url = self.queue.get()
                url_id = HashService.num_md5(url.url_string)
                timestamp = datetime.now().strftime(
                    '%Y-%m-%dT%H:%M:%S.%f')[:-3] + "Z"
                MyLogger.log(
                    LOG.CRAWLER, "url_id=" + str(url_id) + " url=" +
                    str(url.url_string) + " @timestamp=" + timestamp)
                Spider(url, thread_name, url_dao, self.redis).run()
                self.crawled.add(url)
                self.parser.add_link_to_queue(url)
                self.parser.start()
                self.queue.task_done()
        except BlacklistNotFoundError:
            while self.queue.unfinished_tasks > 0:
                self.queue.task_done()
            raise MyThreadError

Example #2

Show file

File: spider.py Project: Yarince/Crawler-Kruipertje

    def __crawl_page(self, url):
        """
        This method is the main method of the spider class.
        If the layer of the url or the size of crawled is bigger than it's corresponding property the program
        will clear the queue.
        If it's not it wil start crawling the page by opening a request and getting the html.
        It'll then save the html to redis.
        After that it'll gather all links from that page and add those links to the queue.

        :param url: URL object
        :return: nothing
        """
        start_time = time.time()
        # Check if url is already crawled or max depth of urls is not exceeded
        if url.layer > Properties.SPIDER_MAX_DEPTH \
                or len(self.crawled) > Properties.SPIDER_MAX_PAGES:
            self.deque.clear()
        else:
            try:
                request = self.__get_request(url)
                html = self.__get_html(request)
                if len(html) > 0:
                    self.__save_html_to_redis(html)
                    self.__add_links_to_queue(Spider.__gather_links(url, html))
                self.crawled.add(url)
                print(
                    self.name,
                    "is now crawling {}\n\t\t\t\t\t\t Queue {} | Crawled {} | Layer: {} | Duration: {}"
                    .format(str(url), str(len(self.deque)),
                            str(len(self.crawled)), str(url.layer),
                            time.time() - start_time))
            except req.HTTPError as e:
                MyLogger.log(
                    LOG.SPIDER, "HTTP Error occurred [{0}]: {1} {2}".format(
                        str(e.code), e.filename, e.reason))
            except req.URLError as e:
                MyLogger.log(LOG.SPIDER,
                             "URL Error occurred: {0}".format(e.reason))
            except ssl.SSLError as e:
                MyLogger.log(LOG.SPIDER, "SSL Error occurred: {0}".format(e))
            except socket.timeout as e:
                MyLogger.log(LOG.SPIDER, "Timeout occurred: {0}".format(e))

Example #3

Show file

File: spider.py Project: Yarince/Crawler-Kruipertje

 def __get_html(self, request):
     """
     This method will return the HTML of the request.
     :param request: Request object
     :return: The HTML of the request object
     """
     html_string = ''
     try:
         response = req.urlopen(request, timeout=self.TIMEOUT_TIME)
         if 'text/html' in response.getheader('Content-Type'):
             html_bytes = response.read()
             html_string = html_bytes.decode("utf-8").strip()
     except UnicodeDecodeError as e:
         MyLogger.log(LOG.SPIDER,
                      "UnicodeDecodeError occurred: {0}".format(e))
     except socket.timeout as e:
         MyLogger.log(LOG.SPIDER, "Timeout occurred: {0}".format(e))
     except ConnectionResetError as e:
         MyLogger.log(
             LOG.SPIDER, "ConnectionResetError occurred [{0}]: {1}".format(
                 str(e.errno), e.strerror))
     except ssl.CertificateError as e:
         MyLogger.log(LOG.SPIDER,
                      "SSL CertificateError: {0}".format(e.args))
     except BadStatusLine as e:
         MyLogger.log(LOG.SPIDER, "BadStatusLine: {0}".format(e.args))
     except IncompleteRead as e:
         MyLogger.log(LOG.SPIDER, "IncompleteRead: {0}".format(e.args))
     return html_string