def handle_crawl_start(self, url, start_time): """ Call RESTFul API to record crawling start. """ data = dict(url=url, size=0, status='downloading', desc=None, start=start_time, end=None) # call rest api to get crawling record # create a new one if no record found # else update crawling status for this record items = rest.api_crawl(api='get', target_url=url) if len(items) > 0: data['id'] = items[0].get('id') rest.api_crawl(api='update', target_url=url, post_data=data) else: rest.api_crawl(api='create', target_url=url, post_data=data)
def handle_crawl_end(self, url, start_time, end_time, result, size): """ Call RESTFul API to record crawling end status. """ # sleep 0.5s to avoid updating status frequently time.sleep(0.5) if result == "success": status, desc = "done", None else: status, desc = "error", result data = dict(url=url, size=size, status=status, desc=desc, start=start_time, end=end_time) items = rest.api_crawl(api='get', target_url=url) if len(items) > 0: data['id'] = items[0].get('id') rest.api_crawl(api='update', target_url=url, post_data=data) else: rest.api_crawl(api='create', target_url=url, post_data=data)