コード例 #1
0
    def test_getLocation(self):
        testLen = 15  #Must adjust numPages if this is set to 20 or higher
        keys = batchExtractMatches(numPages=1,
                                   searchTerm="software engineer",
                                   searchRegexEx=os.environ["JOB_KEYS_REGEX"],
                                   tor=False,
                                   port=9050,
                                   matchSingleWriteLocation=None,
                                   userAgent=None,
                                   sleepTime=None,
                                   htmlWriteLocations=None,
                                   matchWriteLocations=None,
                                   pageIncrements=10)
        locs = []
        for key in keys[:testLen]:
            soup = extractJobHTML(jobKey=key,
                                  tor=False,
                                  port=None,
                                  prettify=False)
            locs.append(getLocation(soup, replaceDict))

        for loc in locs:
            if loc == None:
                continue
            self.assertRegex(loc[0], "[a-z]+")
            self.assertRegex(loc[1], "[a-z]{2}")

        self.assertEqual(len(locs), testLen)
コード例 #2
0
 def test_extractJobHTML(self):
     keys = batchExtractMatches(numPages=1,
                                searchTerm="data scientist",
                                searchRegexEx=os.environ["JOB_KEYS_REGEX"],
                                tor=False,
                                port=9050,
                                matchSingleWriteLocation=None,
                                userAgent=None,
                                sleepTime=None,
                                htmlWriteLocations=None,
                                matchWriteLocations=None,
                                pageIncrements=10)
     key = keys[0]
     soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False)
     self.assertIsInstance(soup, bs4.BeautifulSoup)
コード例 #3
0
 def test_matchClass(self):
     keys = batchExtractMatches(numPages=1,
                                searchTerm="software engineer",
                                searchRegexEx=os.environ["JOB_KEYS_REGEX"],
                                tor=False,
                                port=9050,
                                matchSingleWriteLocation=None,
                                userAgent=None,
                                sleepTime=None,
                                htmlWriteLocations=None,
                                matchWriteLocations=None,
                                pageIncrements=10)
     key = keys[0]
     soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False)
     description = str(
         soup.find_all(match_class(["jobsearch-JobComponent-description"])))
     self.assertIn("class=\"jobsearch-JobComponent-description",
                   description)
コード例 #4
0
 def test_getTags(self):
     keys = batchExtractMatches(numPages=1,
                                searchTerm="software engineer",
                                searchRegexEx=os.environ["JOB_KEYS_REGEX"],
                                tor=False,
                                port=9050,
                                matchSingleWriteLocation=None,
                                userAgent=None,
                                sleepTime=None,
                                htmlWriteLocations=None,
                                matchWriteLocations=None,
                                pageIncrements=10)
     key = keys[0]
     soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False)
     testTags = {
         "commonWords": ["a", "the", "an", "by", "for", "but"],
         "software": ["software"]
     }
     matchedTags = getTags(soup=soup,
                           tags=testTags,
                           replaceDict=replaceDict)
     self.assertIn("commonWords", matchedTags)
     self.assertIn("software", matchedTags)