def start_requests(self): requests = [] items = [] db = MongoClient(host="localhost", port=27017) collection = db.api.quited_url for item in collection.find(): items.append(item) collection.remove() if items.__len__() > 0: for item in items: url = item["url"].encode("utf-8") types = item["type"] item_each = item["item"] need_access_each = item["need_access"] if types == 'api_page': api = API() api_item_init(api) if item_each is not None: item_filed_from_dict(api, item_each) requests.append(Request(url=url,callback=self.parse,meta={'type':'api_page','item':api},dont_filter=True)) elif types == 'followers_page': api_followers = API_Followers() api_followers_init(api_followers) if item_each is not None: item_filed_from_dict(api_followers, item_each) requests.append(Request(url=url,callback=self.api_followers_parse, meta={'type':'followers_page','item':api_followers,'need_access':need_access_each},dont_filter=True)) elif types == 'developers_page': api_developers = API_Developers() api_developers_init(api_developers) if item_each is not None: item_filed_from_dict(api_developers, item_each) requests.append(Request(url=url,callback=self.api_developers_parse, meta={'type':'developers_page','item':api_developers,'need_access':need_access_each},dont_filter=True)) elif types == 'api_summary': api = API() api_item_init(api) if item_each is not None: item_filed_from_dict(api, item_each) requests.append(Request(url=url,callback=self.api_summary_parse,meta={'type':'api_summary','item':api},dont_filter=True)) elif types == 'user_page_f': api_followers = API_Followers() api_followers_init(api_followers) if item_each is not None: item_filed_from_dict(api_followers, item_each) requests.append(Request(url=url, callback=self.user_exceed_parse, meta={'type': 'user_page_f', 'item': api_followers,'need_access':need_access_each},dont_filter=True)) elif types == 'user_page_d': api_developers = API_Developers() api_developers_init(api_developers) if item_each is not None: item_filed_from_dict(api_developers, item_each) requests.append(Request(url=url, callback=self.user_exceed_parse, meta={'type': 'user_page_d', 'item': api_developers,'need_access':need_access_each},dont_filter=True)) else: # requests.append(Request(url="http://www.programmableweb.com/api/google-app-engine",callback=self.api_summary_parse,dont_filter=True)) requests.append(Request(url="http://www.programmableweb.com/apis/directory",meta={'type':'api_page'},callback=self.parse,dont_filter=True)) return requests
def api_summary_parse(self,response): api_item = response.meta.get('item',API()) self.api_summary_parse_util.set_response(response) self.api_summary_parse_util.parse(api_item) api_followers_item = API_Followers() api_followers_init(api_followers_item) api_followers_item["api_name"] = api_item["name"] api_developers_item = API_Developers() api_developers_init(api_developers_item) api_developers_item["api_name"] = api_item["name"] yield api_item yield Request(url=response.url+"/followers",meta={'item':api_followers_item,'type':'followers_page'},callback=self.api_followers_parse,dont_filter=True) yield Request(url=response.url+"/developers",meta={'item':api_developers_item,'type':'developers_page'},callback=self.api_developers_parse,dont_filter=True)