class UrlUtilityTest(unittest.TestCase): """Test to check whether correct response is received and the\ file is downloaded for URLs""" def setUp(self): module_logger.debug('method setUp was called') ## Create the object on the tests will be performed self.url = UrlUtil() ## Create a file to test upon file = open('urlTestFile','w+') file.write('This is the file for testing URL Response\n') file.close() module_logger.debug('method setUp completed successfully') def tearDown(self): module_logger.debug('method tearDown was called') remove('urlTestFile') module_logger.debug('method tearDown completed successfully') def testDownload(self): """Testing the downloading of file from url""" module_logger.debug('method testDownload was called') urlAddress ='file://'+getcwd()+'/urlTestFile' ##'http://www.google.co.in/images/srpr/logo4w.png' self.url.downloadImage(urlAddress,'dlFile.jpg') self.assertTrue( path.isfile('dlFile.jpg') ) remove('dlFile.jpg') module_logger.debug('method testDownload completed successfully') def testUrlResponse(self): """Testing to get the correct url response""" module_logger.debug('method testUrlResponse was called') urlAddress = 'file://'+getcwd()+'/urlTestFile' ##'http://www.google.co.in/images/srpr/logo4w.png' self.assertTrue( self.url.getUrlResponse(urlAddress) ) module_logger.debug('method testUrlResponse completed successfully') def runTest(self): filePointer = open('Test-Results.txt', 'a') suite = unittest.TestLoader().loadTestsFromTestCase(UrlUtilityTest) result = unittest.TextTestRunner(verbosity = 2, stream =filePointer).run(suite) filePointer.close() return result
class ScrapyExtractorTest(unittest.TestCase): """Testing for extraction of different kind of objects""" def setUp(self): module_logger.debug('method setUp was called') #logging.basicConfig(filename='test_log.txt', # level=logging.DEBUG, # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') self.urlUtil = UrlUtil() self.urlAddress = 'file://' + os.path.dirname(__file__) + '/index.html' module_logger.debug('method setUp completed successfully') ##(i) Image extraction test def testImageExtract(self): """Testing for image extraction from given page""" module_logger.debug('method testImageExtract was called') #Initialize the extractor configFile = os.path.dirname(__file__) + '/imageExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create an image item item = ImageArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract images from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted trueData = json.load(open(os.path.dirname(__file__) + '/correctImageData','r')) self.assertTrue(extractedData == trueData) #print extractedData module_logger.debug('method testImageExtract completed successfully') ##(ii) Text extraction test def testTextExtract(self): """Testing for text extraction from given page""" module_logger.debug('method testTextExtract was called') #Initialize the extractor configFile = os.path.dirname(__file__) + '/textExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a text item item = TextArrayItem() #item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract text from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted #print extractedData trueData = json.load(open(os.path.dirname(__file__) + '/correctTextData','r')) self.assertTrue(extractedData == trueData) module_logger.debug('method testTextExtract completed successfully') #(iii) Link Extraction Test def testLinkExtract(self): """Testing for recursive link extraction from given page and following it""" module_logger.debug('method testLinkExtract was called') #Initialize the extractor configFile = os.path.dirname(__file__) + '/linkExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a link item item = LinkArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract links from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted #print extractedData trueData = json.load(open(os.path.dirname(__file__) + '/correctLinkData','r')) self.assertTrue(extractedData == trueData) module_logger.debug('method testLinkExtract completed successfully') def runTest(self): filePointer = open(os.pardir + '/Logger/Test-Results.txt', 'a') suite = unittest.TestLoader().loadTestsFromTestCase(ScrapyExtractorTest) result = unittest.TextTestRunner(verbosity=2, stream=filePointer).run(suite) filePointer.close() return result
class Test(): #class Test(unittest.TestCase): """Testing for extraction of different kind of objects""" def setUp(self): logging.basicConfig(filename='test_log.txt', level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') self.urlUtil = UrlUtil() self.urlAddress = 'file://' + getcwd() + '/index.html' ##(i) Image extraction test def testImageExtract(self): """Testing for image extraction from given page""" #Initialize the extractor configFile = 'imageExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create an image item item = ImageArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract images from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted trueData = json.load(open('correctImageData','r')) self.assertTrue(extractedData == trueData) #print extractedData ##(ii) Text extraction test def testTextExtract(self): """Testing for text extraction from given page""" #Initialize the extractor configFile = 'textExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a text item item = TextArrayItem() #item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract text from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted #print extractedData trueData = json.load(open('correctTextData','r')) self.assertTrue(extractedData == trueData) #(iii) Link Extraction Test def testLinkExtract(self): """Testing for recursive link extraction from given page and following it""" #Initialize the extractor configFile = 'linkExtractionTest.yml' extractor = ScrapyExtractor(configFile, self.urlUtil) #Create a link item item = LinkArrayItem() item.init() #Create an HtmlResponse object for performing XPath operations on it bodyForResponse = self.urlUtil.getUrlResponse(self.urlAddress) response = HtmlResponse(self.urlAddress, body=bodyForResponse) #Extract links from the HtmlResponse object extractedData = extractor.extract(response, item) #Load the correct data and verify it with extracted print extractedData