def test_url(self): url = 'http://www.google.com' data = {'a': 1, 'b': 2, 'c': 3} full = DefaultScraper.encodeurl('POST', url, data) self.assertTrue('<args>' in full) url2, data2 = DefaultScraper.parseurl(full) self.assertEqual(url, url2) self.assertEqual(data, data2)
def _parsepage(self, page, oriurl): page = page[page.index('> ')+2:-5].strip() if ',' in page: page = page.replace(',', '') if page.startswith('1-'): url, data = DefaultScraper.parseurl(oriurl) keyword = data['KEYWORDS'] total = int(page.split(' of ')[1]) phase = Phase(data['fromDate'], data['toDate'], keyword, total) urls = [] for i in xrange(2, phase.pages+1): data['page_no'] = i urls.append(DefaultScraper.encodeurl('POST', url, data)) self._spider.addtask(urls) return phase
def _parsekeyword(self, oriurl): _, data = DefaultScraper.parseurl(oriurl) return data['KEYWORDS']