def test_getLocation(self): testLen = 15 #Must adjust numPages if this is set to 20 or higher keys = batchExtractMatches(numPages=1, searchTerm="software engineer", searchRegexEx=os.environ["JOB_KEYS_REGEX"], tor=False, port=9050, matchSingleWriteLocation=None, userAgent=None, sleepTime=None, htmlWriteLocations=None, matchWriteLocations=None, pageIncrements=10) locs = [] for key in keys[:testLen]: soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False) locs.append(getLocation(soup, replaceDict)) for loc in locs: if loc == None: continue self.assertRegex(loc[0], "[a-z]+") self.assertRegex(loc[1], "[a-z]{2}") self.assertEqual(len(locs), testLen)
def test_extractJobHTML(self): keys = batchExtractMatches(numPages=1, searchTerm="data scientist", searchRegexEx=os.environ["JOB_KEYS_REGEX"], tor=False, port=9050, matchSingleWriteLocation=None, userAgent=None, sleepTime=None, htmlWriteLocations=None, matchWriteLocations=None, pageIncrements=10) key = keys[0] soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False) self.assertIsInstance(soup, bs4.BeautifulSoup)
def test_matchClass(self): keys = batchExtractMatches(numPages=1, searchTerm="software engineer", searchRegexEx=os.environ["JOB_KEYS_REGEX"], tor=False, port=9050, matchSingleWriteLocation=None, userAgent=None, sleepTime=None, htmlWriteLocations=None, matchWriteLocations=None, pageIncrements=10) key = keys[0] soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False) description = str( soup.find_all(match_class(["jobsearch-JobComponent-description"]))) self.assertIn("class=\"jobsearch-JobComponent-description", description)
def test_getTags(self): keys = batchExtractMatches(numPages=1, searchTerm="software engineer", searchRegexEx=os.environ["JOB_KEYS_REGEX"], tor=False, port=9050, matchSingleWriteLocation=None, userAgent=None, sleepTime=None, htmlWriteLocations=None, matchWriteLocations=None, pageIncrements=10) key = keys[0] soup = extractJobHTML(jobKey=key, tor=False, port=None, prettify=False) testTags = { "commonWords": ["a", "the", "an", "by", "for", "but"], "software": ["software"] } matchedTags = getTags(soup=soup, tags=testTags, replaceDict=replaceDict) self.assertIn("commonWords", matchedTags) self.assertIn("software", matchedTags)