def detect_jobcards(self): jobcards_container = self.driver.find_elements_by_class_name( 'jobs-search-two-pane__results') if len(jobcards_container) is 1: jobcards_contents = self.driver.find_elements_by_class_name( 'jobs-search-results') if len(jobcards_contents) is 1: jobcard_ul = self.driver.find_elements_by_class_name( 'jobs-search-results__list') if len(jobcard_ul) is 1: jobcards = self.driver.find_elements_by_class_name( 'artdeco-list__item') self.jobcards_it = FunctionIterator( obj=jobcards, func=self._click, dbgon=True, caller=f"{self.__class__} | {inspect.stack()[0][3]}") print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist." ) else: print(f"\n len(jobcard_ul) is not 1.\n") else: print(f"\n len(jobcards_contents) is not 1.\n") else: print(f"\n len(jobcards_container) is not 1.\n") return self
def collect_on_keywords(self, search_keywords, location='Spain', duration=0): fr = dbg.Function(inspect.currentframe()).report_init() ############################################################ fi = FunctionIterator(search_keywords, self.collect_on_keyword) while fi.iterable: fi.nextop() ############################################################ fr.report_fin()
def text__report(self): fi = FunctionIterator(obj=self.search_keywords, func=callable_func, exp_runtime=3600) while fi.iterable: fi.nextop() self.assertTrue(hasattr(fi, 'idx')) self.assertTrue(hasattr(fi, 'len')) self.assertTrue(hasattr(fi, 'start_dt')) self.assertTrue(hasattr(fi, 'exp_runtime')) self.assertTrue(hasattr(fi, 'callername'))
def test__handle_kwargs(self): fi = FunctionIterator(obj=self.search_keywords, func=callable_func, caller=inspect.currentframe(), p1='param1', p2='param2') self.assertTrue(fi.dbgon) self.assertEqual(len(fi.kwargs), 3) while fi.iterable: keys = fi.nextop() self.assertEqual(len(keys), 3)
def test__init(self): fi = FunctionIterator(obj=self.search_keywords, func=callable_func, duration=10, sleepsecs=2) self.assertEqual(fi.idx, 0) self.assertEqual(fi.len, 2) self.assertTrue(fi.iterable)
def test__nextop(self): fi = FunctionIterator(obj=self.search_keywords, func=callable_func) while fi.iterable: fi.nextop()
def collect_on_keywords(search_keywords): fi = FunctionIterator(search_keywords, collect_on_keyword) while fi.iterable: fi.nextop()
class Collector(models.LinkedInJobPosting): def __init__(self, driver, nextpage_click_secs=3, jobcard_click_secs=2, job_details_human_reading_secs=30): super().__init__() self.driver = driver # self.move_to_job_search_page() # self.extract_keyword_location() self.collect_dt = datetime.now().astimezone() self.job_details_human_reading_secs = job_details_human_reading_secs self.jobcard_click_secs = jobcard_click_secs self.nextpage_click_secs = nextpage_click_secs def is_readyto_collect(self): uo = urlparse(self.driver.current_url) if ('keywords' in uo.query) and ('location' in uo.query) and ('sortBy' in uo.query): return True else: return False def extract_keyword_location(self): uo = urlparse(self.driver.current_url) qs = parse_qs(uo.query) self.search_keyword = qs['keywords'][0] self.search_location = qs['location'][0] def detect_jobcards(self): jobcards_container = self.driver.find_elements_by_class_name( 'jobs-search-two-pane__results') if len(jobcards_container) is 1: jobcards_contents = self.driver.find_elements_by_class_name( 'jobs-search-results') if len(jobcards_contents) is 1: jobcard_ul = self.driver.find_elements_by_class_name( 'jobs-search-results__list') if len(jobcard_ul) is 1: jobcards = self.driver.find_elements_by_class_name( 'artdeco-list__item') self.jobcards = FunctionIterator(jobcards, self._click) print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist." ) else: print(f"\n len(jobcard_ul) is not 1.\n") else: print(f"\n len(jobcards_contents) is not 1.\n") else: print(f"\n len(jobcards_container) is not 1.\n") return self def parse_jobcard(self): active_jobcard = self.driver.find_elements_by_class_name( 'job-card-search--is-active') if len(active_jobcard) is 1: jobcard = active_jobcard[0].find_elements_by_class_name( 'job-card-search__content-wrapper') if len(jobcard) is 1: job_title = jobcard[0].find_elements_by_class_name( 'job-card-search__title') if len(job_title) is 1: self.title = job_title[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(job_title) is not 1.\n" ) companyname = jobcard[0].find_elements_by_class_name( 'job-card-search__company-name') if len(companyname) is 1: self.companyname = companyname[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(companyname) is not 1.\n" ) location = jobcard[0].find_elements_by_class_name( 'job-card-search__location') if len(location) is 1: self.location = location[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(location) is not 1.\n" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(jobcard) is not 1.\n" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 이런 경우는 절대 발생할 수 없다.\n" ) def _click(self, webelem): """job-card-search--is-active""" try: webelem.click() except Exception as e: pass def detect_job_details(self): rightpanel_container = self.driver.find_elements_by_class_name( 'jobs-search-two-pane__details') if len(rightpanel_container) is 1: content_container = self.driver.find_elements_by_class_name( 'jobs-details__main-content') if len(content_container) is 1: print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Right-panel exists.\n" ) return True else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(content_container) is not 1.\n" ) return False else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(rightpanel_container) is not 1.\n" ) return False def debug_re_search(self, sleepsecs=3): time.sleep(sleepsecs) print(f"{'*'*60}\n{self.__class__} | {inspect.stack()[0][3]}") html = self.driver.page_source m = re.search('jobs-premium-applicant-insights\s*', string=html) print( f"{'-'*60}\n jobs-premium-applicant-insights\s* re.search :\n {m}") m = re.search('jobs-premium-company-insights\s*', string=html) print(f"{'-'*60}\n jobs-premium-company-insights\s* re.search :\n {m}") m = re.search('jobs-company__card\s*', string=html) print(f"{'-'*60}\n jobs-company__card\s* re.search :\n {m}") def click_job_description_see_more(self, sleepsecs=5): """See More""" time.sleep(sleepsecs) job_description = self.driver.find_elements_by_class_name( 'jobs-description') see_more_btn = job_description[0].find_elements_by_class_name( 'artdeco-button') try: see_more_btn[0].click() # self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") except Exception as e: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 무시하라." ) pass def click_applicant_insights_send_feedback(self, sleepsecs=2): """Send Feedback""" time.sleep(sleepsecs) applicant_insights = self.driver.find_elements_by_class_name( 'jobs-premium-applicant-insights') if len(applicant_insights) is 1: send_feedback = applicant_insights[0].find_elements_by_class_name( 'display-flex') if len(send_feedback) is 1: ActionChains(driver).move_to_element( send_feedback[0]).perform() else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(send_feedback) is not 1." ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(applicant_insights) is not 1." ) def click_company_insights_more_company(self, sleepsecs=2): """See more company insights""" time.sleep(sleepsecs) company_insights = self.driver.find_elements_by_class_name( 'jobs-premium-company-insights') if len(company_insights) is 1: try: more_company = company_insights[0].find_element_by_xpath( "//a[contains(@data-control-name, 'see_more_company_link')]" ) except Exception as e: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 무시하라." ) pass else: ActionChains(driver).move_to_element(more_company).perform() else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(company_insights) is not 1." ) def save_job_details(self): if hasattr(self, 'search_keyword') and hasattr( self, 'search_location') and hasattr( self, 'companyname') and hasattr(self, 'title'): self.html = self.driver.page_source filter = { 'search_keyword': self.search_keyword, 'search_location': self.search_location, 'collect_dt': self.collect_dt, 'companyname': self.companyname, 'title': self.title, } print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n list(self.schematize().doc) : {list(self.schematize().doc)}." ) self.insert_doc() else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 중요한 키값을 가지고 있지 않기 때문에 저장하지 않는다.\n" ) return self """삭제 대상""" def detect_pagination(self): pagination_indicator = self.driver.find_elements_by_class_name( 'artdeco-pagination__indicator--number') if len(pagination_indicator) is 0: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(pagination_indicator) is 0.\n" ) return False else: self.pagination_indicators = [] for indicator in pagination_indicator: self.pagination_indicators.append(indicator) # self.pagination = FunctionIterator(self.pagination_indicators, self.paginate) return True def selected_page(self, addi_info=None): pagination_pages = self.driver.find_elements_by_class_name( 'artdeco-pagination__pages--number') if len(pagination_pages) is 1: selected = pagination_pages[0].find_elements_by_class_name( 'selected') if len(selected) is 1: self._selected_page = selected[0].text print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]} : {self._selected_page} ({addi_info})" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(selected) is not 1.\n" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(pagination_pages) is not 1.\n" ) return self """삭제 대상""" def paginate(self): """return next_clickable : True | False""" if self.detect_pagination(): self.selected_page("current_page_num") if hasattr(self, 'pagination_indicators'): page_click_on = False for indicator in self.pagination_indicators: if page_click_on is True: indicator.click() self.selected_page("moved_page_num") return True else: pass if self._selected_page == indicator.find_element_by_tag_name( 'span').text: page_click_on = True else: pass return False else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n hasattr(self, 'pagination_indicators') is False.\n" ) return False else: return False """삭제 대상""" def paginate_v1(self, cur_pagenum, last_pagenum, sleepsecs): while cur_pagenum <= last_pagenum: time.sleep(sleepsecs) pagination = self.driver.find_element_by_tag_name( 'artdeco-pagination') cur_pbutton = pagination.find_element_by_xpath( '//li[contains(@class, "active selected")]') cur_pagenum = int( cur_pbutton.find_element_by_tag_name('span').text) print( f"{'-'*60}\n page-number progress : {cur_pagenum}/{last_pagenum}" ) ############################################################ collect_1page() ############################################################ """다음 페이지 번호를 찾아서 클릭.""" page_buttons = pagination.find_elements_by_tag_name('li') cur_i = None for i, pbutton in enumerate(page_buttons, start=1): if pbutton.find_element_by_tag_name('span').text == str( cur_pagenum): cur_i = i if (cur_i is not None) and (i is cur_i + 1): print(f" Go to next page.") pbutton.click() break if cur_pagenum is last_pagenum: print(f" This is the last page. Stop.") cur_pagenum += 1 break fr.report_mid(addi_info=f" cur_pagenum : {cur_pagenum} 완료.") fr.report_fin() def collect_job_details(self): if self.detect_job_details(): self.click_job_description_see_more(5) self.click_applicant_insights_send_feedback(2) self.click_company_insights_more_company(2) self.debug_re_search(3) """sleepsecs 동안 job-details 읽는 척.""" time.sleep(self.job_details_human_reading_secs) self.save_job_details() else: pass def loop_jobcards(self): self.detect_jobcards() while self.jobcards.iterable: """2초 후에 다음 job-card를 클릭.""" time.sleep(self.jobcard_click_secs) self.jobcards.nextop() self.parse_jobcard() self.collect_job_details() # break """삭제 대상""" def loop_pagination_v1(self, sleepsecs=3): pg_result = True while pg_result: self.loop_jobcards() """다음 페이지 버튼을 누르기까지 sleepsecs 만큼 걸린다.""" time.sleep(sleepsecs) pg_result = self.paginate() # break def loop_pagination(self): pi = PaginationIterator(func=self.loop_jobcards) while pi.iterable: pi.nextop() """다음 페이지 버튼을 누르기까지 sleepsecs 만큼 걸린다.""" time.sleep(self.nextpage_click_secs)
class JobCards: def __init__(self): super(JobCards, self).__init__() def _click(self, webelem, **kwargs): """job-card-search--is-active""" try: webelem.click() except Exception as e: pass def detect_jobcards(self): jobcards_container = self.driver.find_elements_by_class_name( 'jobs-search-two-pane__results') if len(jobcards_container) is 1: jobcards_contents = self.driver.find_elements_by_class_name( 'jobs-search-results') if len(jobcards_contents) is 1: jobcard_ul = self.driver.find_elements_by_class_name( 'jobs-search-results__list') if len(jobcard_ul) is 1: jobcards = self.driver.find_elements_by_class_name( 'artdeco-list__item') self.jobcards_it = FunctionIterator( obj=jobcards, func=self._click, dbgon=True, caller=f"{self.__class__} | {inspect.stack()[0][3]}") print( f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist." ) else: print(f"\n len(jobcard_ul) is not 1.\n") else: print(f"\n len(jobcards_contents) is not 1.\n") else: print(f"\n len(jobcards_container) is not 1.\n") return self def parse_jobcard(self): active_jobcard = self.driver.find_elements_by_class_name( 'job-card-search--is-active') if len(active_jobcard) is 1: jobcard = active_jobcard[0].find_elements_by_class_name( 'job-card-search__content-wrapper') if len(jobcard) is 1: job_title = jobcard[0].find_elements_by_class_name( 'job-card-search__title') if len(job_title) is 1: self.title = job_title[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(job_title) is not 1.\n" ) companyname = jobcard[0].find_elements_by_class_name( 'job-card-search__company-name') if len(companyname) is 1: self.companyname = companyname[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(companyname) is not 1.\n" ) location = jobcard[0].find_elements_by_class_name( 'job-card-search__location') if len(location) is 1: self.location = location[0].text else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(location) is not 1.\n" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(jobcard) is not 1.\n" ) else: print( f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 이런 경우는 절대 발생할 수 없다.\n" ) def loop_jobcards(self): self.detect_jobcards() while self.jobcards_it.iterable: """2초 후에 다음 job-card를 클릭.""" time.sleep(self.jobcard_click_secs) self.jobcards_it.nextop() self.parse_jobcard() self.collect_job_details()