예제 #1
0
파일: spider.py 프로젝트: shymonk/tools
    def handle_crawl_start(self, url, start_time):
        """ Call RESTFul API to record crawling start.
        """
        data = dict(url=url, size=0, status='downloading',
            desc=None, start=start_time, end=None)

        # call rest api to get crawling record
        # create a new one if no record found
        # else update crawling status for this record
        items = rest.api_crawl(api='get', target_url=url)
        if len(items) > 0:
            data['id'] = items[0].get('id')
            rest.api_crawl(api='update', target_url=url, post_data=data)
        else:
            rest.api_crawl(api='create', target_url=url, post_data=data)
예제 #2
0
파일: spider.py 프로젝트: clicknull/tools-2
    def handle_crawl_start(self, url, start_time):
        """ Call RESTFul API to record crawling start.
        """
        data = dict(url=url,
                    size=0,
                    status='downloading',
                    desc=None,
                    start=start_time,
                    end=None)

        # call rest api to get crawling record
        # create a new one if no record found
        # else update crawling status for this record
        items = rest.api_crawl(api='get', target_url=url)
        if len(items) > 0:
            data['id'] = items[0].get('id')
            rest.api_crawl(api='update', target_url=url, post_data=data)
        else:
            rest.api_crawl(api='create', target_url=url, post_data=data)
예제 #3
0
파일: spider.py 프로젝트: shymonk/tools
    def handle_crawl_end(self, url, start_time, end_time, result, size):
        """ Call RESTFul API to record crawling end status.
        """
        # sleep 0.5s to avoid updating status frequently
        time.sleep(0.5)

        if result == "success":
            status, desc = "done", None
        else:
            status, desc = "error", result

        data = dict(url=url, size=size, status=status,
            desc=desc, start=start_time, end=end_time)

        items = rest.api_crawl(api='get', target_url=url)
        if len(items) > 0:
            data['id'] = items[0].get('id')
            rest.api_crawl(api='update', target_url=url, post_data=data)
        else:
            rest.api_crawl(api='create', target_url=url, post_data=data)
예제 #4
0
파일: spider.py 프로젝트: clicknull/tools-2
    def handle_crawl_end(self, url, start_time, end_time, result, size):
        """ Call RESTFul API to record crawling end status.
        """
        # sleep 0.5s to avoid updating status frequently
        time.sleep(0.5)

        if result == "success":
            status, desc = "done", None
        else:
            status, desc = "error", result

        data = dict(url=url,
                    size=size,
                    status=status,
                    desc=desc,
                    start=start_time,
                    end=end_time)

        items = rest.api_crawl(api='get', target_url=url)
        if len(items) > 0:
            data['id'] = items[0].get('id')
            rest.api_crawl(api='update', target_url=url, post_data=data)
        else:
            rest.api_crawl(api='create', target_url=url, post_data=data)