Beispiel #1
0
    def setUp(self):
        self._url = 'https://www.google.co.jp/search?q=ruby'
        self._testdatadir = "./tests/data/"
        self._transaction_id = "transaction_test"
        self._validator = CategoryValidator(self._url, self._transaction_id)
        self._scrapyer = GoogSearchScrapyer()
        self._scoreler = StdScoreler()

        self._relatedurls = ["https://facebook.com", "https://instagram.com"]
        #categorysetterexe
        exepath = "ls"
        self._infile = self._testdatadir + self._transaction_id + "1.scraped"
        self._outfile = self._testdatadir + self._transaction_id + "1.categorized"
        self._categorysetter = CategorySetterExe(exepath, self._infile,
                                                 self._outfile)
        rm_quoat = lambda val: re.sub(r'\"', '', val)
    def setUp(self):
	self._url = 'https://www.google.co.jp/search?q=ruby'
        self._testdatadir    = "./tests/data/"
        self._transaction_id = "transaction_test"
        self._validator= CategoryValidator(self._url,self._transaction_id)
        self._scrapyer       = GoogSearchScrapyer()
        self._scoreler       = StdScoreler()

        self._relatedurls = ["https://facebook.com","https://instagram.com"]
        #categorysetterexe
        exepath = "ls"
        self._infile  = self._testdatadir + self._transaction_id + "1.scraped"
        self._outfile = self._testdatadir + self._transaction_id + "1.categorized"
        self._categorysetter = CategorySetterExe(exepath,self._infile,self._outfile)
        rm_quoat   = lambda val: re.sub(r'\"','',val)
Beispiel #3
0
class TestCategoryValidator(unittest.TestCase):
    def setUp(self):
        self._url = 'https://www.google.co.jp/search?q=ruby'
        self._testdatadir = "./tests/data/"
        self._transaction_id = "transaction_test"
        self._validator = CategoryValidator(self._url, self._transaction_id)
        self._scrapyer = GoogSearchScrapyer()
        self._scoreler = StdScoreler()

        self._relatedurls = ["https://facebook.com", "https://instagram.com"]
        #categorysetterexe
        exepath = "ls"
        self._infile = self._testdatadir + self._transaction_id + "1.scraped"
        self._outfile = self._testdatadir + self._transaction_id + "1.categorized"
        self._categorysetter = CategorySetterExe(exepath, self._infile,
                                                 self._outfile)
        rm_quoat = lambda val: re.sub(r'\"', '', val)

    def test_categorysetterexe(self):
        #set relatedurls(list)
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do()
        categorized_urls = self._categorysetter.items()
        self.assertEqual(len(categorized_urls), 4)

    #success
    #use mock in all classes
    def test_success_all_mock(self):
        #scraper mock
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.return_value = 0
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()

        category = self._validator.do(self._scrapyer, self._scoreler,
                                      self._categorysetter)
        self.assertEqual(category, "searchengine")

        #outputjson
        writer = Url2JsonWriter()
        writer.output(self._transaction_id + ".json", self._validator)

    #use mock in scoreler and categorysetter
    def test_success_scoreler_categorysetter_mock(self):
        #scrapyer real internet access.
        self._scrapyer.target(self._url)

        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()

        category = self._validator.do(self._scrapyer, self._scoreler,
                                      self._categorysetter)
        print self._validator.getDetail()
        self.assertEqual(category, "searchengine")

    #use mock in categorysetter
    def test_success_categorysetter_mock(self):
        #scrapyer real internet access.
        self._scrapyer.target(self._url)

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()
        #self._categorysetter.do()
        category = self._validator.do(self._scrapyer, self._scoreler,
                                      self._categorysetter)

        print self._validator.getDetail()
        self.assertEqual(category, "lang")

    #error
    #use mock in all classes
    def test_error_categoryvalidator(self):
        #scraper mock cause exception
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.side_effect = Exception
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()

        self._categorysetter.setData(self._relatedurls)
        #self._categorysetter.do()
        with self.assertRaises(Exception) as cnmgr:
            self._validator.do(self._scrapyer, self._scoreler,
                               self._categorysetter)
        print cnmgr.exception

    #use mock in scoreler and categorysetter
    def test_error_categoryvalidator2(self):
        #scraper mock
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.return_value = 0
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock cause exception
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.side_effect = Exception

        #categorysetter mock
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()

        self._categorysetter.setData(self._relatedurls)
        #self._categorysetter.do()
        with self.assertRaises(Exception) as cnmgr:
            self._validator.do(self._scrapyer, self._scoreler,
                               self._categorysetter)
        print cnmgr.exception
class TestCategoryValidator(unittest.TestCase):
    def setUp(self):
	self._url = 'https://www.google.co.jp/search?q=ruby'
        self._testdatadir    = "./tests/data/"
        self._transaction_id = "transaction_test"
        self._validator= CategoryValidator(self._url,self._transaction_id)
        self._scrapyer       = GoogSearchScrapyer()
        self._scoreler       = StdScoreler()

        self._relatedurls = ["https://facebook.com","https://instagram.com"]
        #categorysetterexe
        exepath = "ls"
        self._infile  = self._testdatadir + self._transaction_id + "1.scraped"
        self._outfile = self._testdatadir + self._transaction_id + "1.categorized"
        self._categorysetter = CategorySetterExe(exepath,self._infile,self._outfile)
        rm_quoat   = lambda val: re.sub(r'\"','',val)

    def test_categorysetterexe(self):
        #set relatedurls(list)
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do()
        categorized_urls = self._categorysetter.items()
        self.assertEqual(len(categorized_urls),4)
      
    #success
    #use mock in all classes
    def test_success_all_mock(self):
        #scraper mock
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.return_value = 0
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()             
       
        category = self._validator.do(self._scrapyer,self._scoreler,self._categorysetter)
        self.assertEqual(category,"searchengine")

        #outputjson
        writer = Url2JsonWriter()
        writer.output(self._transaction_id +".json",self._validator)
        
    #use mock in scoreler and categorysetter 
    def test_success_scoreler_categorysetter_mock(self):
        #scrapyer real internet access.
        self._scrapyer.target(self._url)
        
        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()             
 

        category = self._validator.do(self._scrapyer,self._scoreler,self._categorysetter)
        print self._validator.getDetail()
        self.assertEqual(category,"searchengine")

    #use mock in categorysetter 
    def test_success_categorysetter_mock(self):
        #scrapyer real internet access.
        self._scrapyer.target(self._url)

        #categorysetter mock
        self._categorysetter.setData(self._relatedurls)
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()             
        #self._categorysetter.do()
        category = self._validator.do(self._scrapyer,self._scoreler,self._categorysetter)

        print self._validator.getDetail()
        self.assertEqual(category,"lang")

    #error
    #use mock in all classes
    def test_error_categoryvalidator(self):
        #scraper mock cause exception
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.side_effect = Exception
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.return_value = "searchengine"

        #categorysetter mock
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()             
        
        self._categorysetter.setData(self._relatedurls)
        #self._categorysetter.do()
        with self.assertRaises(Exception) as cnmgr:
            self._validator.do(self._scrapyer,self._scoreler,self._categorysetter)
        print cnmgr.exception

    #use mock in scoreler and categorysetter 
    def test_error_categoryvalidator2(self):
        #scraper mock
        self._scrapyer.target(self._url)
        self._scrapyer.do = Mock()
        self._scrapyer.do.return_value = 0
        self._scrapyer.getRelatedUrl = Mock()
        self._scrapyer.getRelatedUrl.return_value = self._relatedurls
        #scoreler mock cause exception
        self._scoreler.analyze = Mock()
        self._scoreler.analyze.side_effect = Exception

        #categorysetter mock
        self._categorysetter.do = Mock()
        self._categorysetter.return_value = 0
        self._categorysetter._getCategorizedUrls()             
        
        self._categorysetter.setData(self._relatedurls)
        #self._categorysetter.do()
        with self.assertRaises(Exception) as cnmgr:
            self._validator.do(self._scrapyer,self._scoreler,self._categorysetter)
        print cnmgr.exception
Beispiel #5
0
    i=0
    with open(self._no_cat_url_list) as fo:
        with JSONStreamWriter.ArrayWriter(result_json) as jstream:
            for url in fo:
                if url.strip()== "" : continue
                    
                    transaction_id = str(time.time()).replace(".","_")
                    try:
                        #set categorysetter
                        infile  = tmpdir + "/" + self._transaction_id + str(i+1) + ".scraped"
                        outfile = tmpdir + "/" + self._transaction_id + str(i+1) + ".categorized"
                        categorysetter = CategorySetterExe(exepath,infile,outfile)
                    
                        #validate URL
                        wkValidator  = CategoryValidator(url,tmpdir + "/" + transaction_id + str(i))
                        category = wkValidator.do(scraper,scoreler,categorysetter)

                        #output result to a jsonformatted file.
                        writer.output(result_json,wkValidator)
                        #evt = fmter.Urls2Json(url,category,wkValidator.getDetail(),transaction_id)
                        #jstream.write(evt)

                        #delete tmpfile .straped and .categorized
                        #under construction
                        os.remove(infile)
                        os.remove(outfile)

                        i=i+1
                    except Exception as e:
                        print e