def __init__(self, site): if len(site) == 0: return self.meta = site self.meta['db'] = aituans.mongodbConnection() self.logger = aituans.initLogger("parser") return
#!/usr/bin/env python2.7 #coding:utf-8 ''' Created on 2011-5-13 @author: zeroq ''' import unittest import aituans import os LOGGER = aituans.initLogger("spider") class AituansTest(unittest.TestCase): def testSpider(self): spider = aituans.Spider({"class":"test", "name":"test", "url":"http://www.groupon.cn/BeiJing/","domain":"www.groupon.cn"}, os.path.abspath(os.path.dirname(__file__))) spider.start() spider.join() #assert aituans.spiderMain() == True pass def testUpdater(self): #assert aituans.updaterMain() == True pass def testGetSites(self): """ 1测试能否取得要抓取的SITES,sites是一个list """ #logger = aituans.initLogger("test")
#!/usr/bin/env python2.7 #coding:utf-8 ''' Created on 2011-5-16 @author: zeroq ''' import aituans import rule import unittest LOGGER = aituans.initLogger("parser") class ParserTest(unittest.TestCase): def testgetFiles(self): sites = aituans.getSites() ps = rule.ParserBase(sites[0]) files = ps.getFiles() assert files def testGetFileContent(self): sites = aituans.getSites() ps = rule.ParserBase(sites[0]) files = ps.getFiles() pd = ps.getPageContentFromFile(files[0]) assert pd def testGetAttrs(self): sites = aituans.getSites()