예제 #1
0
 def detect_jobcards(self):
     jobcards_container = self.driver.find_elements_by_class_name(
         'jobs-search-two-pane__results')
     if len(jobcards_container) is 1:
         jobcards_contents = self.driver.find_elements_by_class_name(
             'jobs-search-results')
         if len(jobcards_contents) is 1:
             jobcard_ul = self.driver.find_elements_by_class_name(
                 'jobs-search-results__list')
             if len(jobcard_ul) is 1:
                 jobcards = self.driver.find_elements_by_class_name(
                     'artdeco-list__item')
                 self.jobcards_it = FunctionIterator(
                     obj=jobcards,
                     func=self._click,
                     dbgon=True,
                     caller=f"{self.__class__} | {inspect.stack()[0][3]}")
                 print(
                     f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist."
                 )
             else:
                 print(f"\n len(jobcard_ul) is not 1.\n")
         else:
             print(f"\n len(jobcards_contents) is not 1.\n")
     else:
         print(f"\n len(jobcards_container) is not 1.\n")
     return self
예제 #2
0
 def collect_on_keywords(self,
                         search_keywords,
                         location='Spain',
                         duration=0):
     fr = dbg.Function(inspect.currentframe()).report_init()
     ############################################################
     fi = FunctionIterator(search_keywords, self.collect_on_keyword)
     while fi.iterable:
         fi.nextop()
     ############################################################
     fr.report_fin()
예제 #3
0
 def text__report(self):
     fi = FunctionIterator(obj=self.search_keywords,
                           func=callable_func,
                           exp_runtime=3600)
     while fi.iterable:
         fi.nextop()
     self.assertTrue(hasattr(fi, 'idx'))
     self.assertTrue(hasattr(fi, 'len'))
     self.assertTrue(hasattr(fi, 'start_dt'))
     self.assertTrue(hasattr(fi, 'exp_runtime'))
     self.assertTrue(hasattr(fi, 'callername'))
예제 #4
0
 def test__handle_kwargs(self):
     fi = FunctionIterator(obj=self.search_keywords,
                           func=callable_func,
                           caller=inspect.currentframe(),
                           p1='param1',
                           p2='param2')
     self.assertTrue(fi.dbgon)
     self.assertEqual(len(fi.kwargs), 3)
     while fi.iterable:
         keys = fi.nextop()
         self.assertEqual(len(keys), 3)
예제 #5
0
 def test__init(self):
     fi = FunctionIterator(obj=self.search_keywords,
                           func=callable_func,
                           duration=10,
                           sleepsecs=2)
     self.assertEqual(fi.idx, 0)
     self.assertEqual(fi.len, 2)
     self.assertTrue(fi.iterable)
예제 #6
0
 def test__nextop(self):
     fi = FunctionIterator(obj=self.search_keywords, func=callable_func)
     while fi.iterable:
         fi.nextop()
예제 #7
0
 def collect_on_keywords(search_keywords):
     fi = FunctionIterator(search_keywords, collect_on_keyword)
     while fi.iterable:
         fi.nextop()
예제 #8
0
class Collector(models.LinkedInJobPosting):
    def __init__(self,
                 driver,
                 nextpage_click_secs=3,
                 jobcard_click_secs=2,
                 job_details_human_reading_secs=30):
        super().__init__()
        self.driver = driver
        # self.move_to_job_search_page()
        # self.extract_keyword_location()
        self.collect_dt = datetime.now().astimezone()
        self.job_details_human_reading_secs = job_details_human_reading_secs
        self.jobcard_click_secs = jobcard_click_secs
        self.nextpage_click_secs = nextpage_click_secs

    def is_readyto_collect(self):
        uo = urlparse(self.driver.current_url)
        if ('keywords' in uo.query) and ('location'
                                         in uo.query) and ('sortBy'
                                                           in uo.query):
            return True
        else:
            return False

    def extract_keyword_location(self):
        uo = urlparse(self.driver.current_url)
        qs = parse_qs(uo.query)
        self.search_keyword = qs['keywords'][0]
        self.search_location = qs['location'][0]

    def detect_jobcards(self):
        jobcards_container = self.driver.find_elements_by_class_name(
            'jobs-search-two-pane__results')
        if len(jobcards_container) is 1:
            jobcards_contents = self.driver.find_elements_by_class_name(
                'jobs-search-results')
            if len(jobcards_contents) is 1:
                jobcard_ul = self.driver.find_elements_by_class_name(
                    'jobs-search-results__list')
                if len(jobcard_ul) is 1:
                    jobcards = self.driver.find_elements_by_class_name(
                        'artdeco-list__item')
                    self.jobcards = FunctionIterator(jobcards, self._click)
                    print(
                        f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist."
                    )
                else:
                    print(f"\n len(jobcard_ul) is not 1.\n")
            else:
                print(f"\n len(jobcards_contents) is not 1.\n")
        else:
            print(f"\n len(jobcards_container) is not 1.\n")
        return self

    def parse_jobcard(self):
        active_jobcard = self.driver.find_elements_by_class_name(
            'job-card-search--is-active')
        if len(active_jobcard) is 1:
            jobcard = active_jobcard[0].find_elements_by_class_name(
                'job-card-search__content-wrapper')
            if len(jobcard) is 1:
                job_title = jobcard[0].find_elements_by_class_name(
                    'job-card-search__title')
                if len(job_title) is 1:
                    self.title = job_title[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(job_title) is not 1.\n"
                    )

                companyname = jobcard[0].find_elements_by_class_name(
                    'job-card-search__company-name')
                if len(companyname) is 1:
                    self.companyname = companyname[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(companyname) is not 1.\n"
                    )

                location = jobcard[0].find_elements_by_class_name(
                    'job-card-search__location')
                if len(location) is 1:
                    self.location = location[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(location) is not 1.\n"
                    )

            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(jobcard) is not 1.\n"
                )
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 이런 경우는 절대 발생할 수 없다.\n"
            )

    def _click(self, webelem):
        """job-card-search--is-active"""
        try:
            webelem.click()
        except Exception as e:
            pass

    def detect_job_details(self):
        rightpanel_container = self.driver.find_elements_by_class_name(
            'jobs-search-two-pane__details')
        if len(rightpanel_container) is 1:
            content_container = self.driver.find_elements_by_class_name(
                'jobs-details__main-content')
            if len(content_container) is 1:
                print(
                    f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Right-panel exists.\n"
                )
                return True
            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(content_container) is not 1.\n"
                )
                return False
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(rightpanel_container) is not 1.\n"
            )
            return False

    def debug_re_search(self, sleepsecs=3):
        time.sleep(sleepsecs)
        print(f"{'*'*60}\n{self.__class__} | {inspect.stack()[0][3]}")
        html = self.driver.page_source

        m = re.search('jobs-premium-applicant-insights\s*', string=html)
        print(
            f"{'-'*60}\n jobs-premium-applicant-insights\s* re.search :\n {m}")

        m = re.search('jobs-premium-company-insights\s*', string=html)
        print(f"{'-'*60}\n jobs-premium-company-insights\s* re.search :\n {m}")

        m = re.search('jobs-company__card\s*', string=html)
        print(f"{'-'*60}\n jobs-company__card\s* re.search :\n {m}")

    def click_job_description_see_more(self, sleepsecs=5):
        """See More"""
        time.sleep(sleepsecs)
        job_description = self.driver.find_elements_by_class_name(
            'jobs-description')
        see_more_btn = job_description[0].find_elements_by_class_name(
            'artdeco-button')
        try:
            see_more_btn[0].click()
            # self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        except Exception as e:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 무시하라."
            )
            pass

    def click_applicant_insights_send_feedback(self, sleepsecs=2):
        """Send Feedback"""
        time.sleep(sleepsecs)
        applicant_insights = self.driver.find_elements_by_class_name(
            'jobs-premium-applicant-insights')
        if len(applicant_insights) is 1:
            send_feedback = applicant_insights[0].find_elements_by_class_name(
                'display-flex')
            if len(send_feedback) is 1:
                ActionChains(driver).move_to_element(
                    send_feedback[0]).perform()
            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(send_feedback) is not 1."
                )
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(applicant_insights) is not 1."
            )

    def click_company_insights_more_company(self, sleepsecs=2):
        """See more company insights"""
        time.sleep(sleepsecs)
        company_insights = self.driver.find_elements_by_class_name(
            'jobs-premium-company-insights')
        if len(company_insights) is 1:
            try:
                more_company = company_insights[0].find_element_by_xpath(
                    "//a[contains(@data-control-name, 'see_more_company_link')]"
                )
            except Exception as e:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 무시하라."
                )
                pass
            else:
                ActionChains(driver).move_to_element(more_company).perform()
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(company_insights) is not 1."
            )

    def save_job_details(self):
        if hasattr(self, 'search_keyword') and hasattr(
                self, 'search_location') and hasattr(
                    self, 'companyname') and hasattr(self, 'title'):
            self.html = self.driver.page_source
            filter = {
                'search_keyword': self.search_keyword,
                'search_location': self.search_location,
                'collect_dt': self.collect_dt,
                'companyname': self.companyname,
                'title': self.title,
            }
            print(
                f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n list(self.schematize().doc) : {list(self.schematize().doc)}."
            )
            self.insert_doc()
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 중요한 키값을 가지고 있지 않기 때문에 저장하지 않는다.\n"
            )
        return self

    """삭제 대상"""

    def detect_pagination(self):
        pagination_indicator = self.driver.find_elements_by_class_name(
            'artdeco-pagination__indicator--number')
        if len(pagination_indicator) is 0:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(pagination_indicator) is 0.\n"
            )
            return False
        else:
            self.pagination_indicators = []
            for indicator in pagination_indicator:
                self.pagination_indicators.append(indicator)
            # self.pagination = FunctionIterator(self.pagination_indicators, self.paginate)
            return True

    def selected_page(self, addi_info=None):
        pagination_pages = self.driver.find_elements_by_class_name(
            'artdeco-pagination__pages--number')
        if len(pagination_pages) is 1:
            selected = pagination_pages[0].find_elements_by_class_name(
                'selected')
            if len(selected) is 1:
                self._selected_page = selected[0].text
                print(
                    f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]} : {self._selected_page} ({addi_info})"
                )
            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(selected) is not 1.\n"
                )
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(pagination_pages) is not 1.\n"
            )
        return self

    """삭제 대상"""

    def paginate(self):
        """return next_clickable : True | False"""
        if self.detect_pagination():
            self.selected_page("current_page_num")
            if hasattr(self, 'pagination_indicators'):
                page_click_on = False
                for indicator in self.pagination_indicators:
                    if page_click_on is True:
                        indicator.click()
                        self.selected_page("moved_page_num")
                        return True
                    else:
                        pass

                    if self._selected_page == indicator.find_element_by_tag_name(
                            'span').text:
                        page_click_on = True
                    else:
                        pass
                return False
            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n hasattr(self, 'pagination_indicators') is False.\n"
                )
                return False
        else:
            return False

    """삭제 대상"""

    def paginate_v1(self, cur_pagenum, last_pagenum, sleepsecs):
        while cur_pagenum <= last_pagenum:
            time.sleep(sleepsecs)
            pagination = self.driver.find_element_by_tag_name(
                'artdeco-pagination')
            cur_pbutton = pagination.find_element_by_xpath(
                '//li[contains(@class, "active selected")]')
            cur_pagenum = int(
                cur_pbutton.find_element_by_tag_name('span').text)
            print(
                f"{'-'*60}\n page-number progress : {cur_pagenum}/{last_pagenum}"
            )
            ############################################################

            collect_1page()

            ############################################################
            """다음 페이지 번호를 찾아서 클릭."""
            page_buttons = pagination.find_elements_by_tag_name('li')
            cur_i = None
            for i, pbutton in enumerate(page_buttons, start=1):
                if pbutton.find_element_by_tag_name('span').text == str(
                        cur_pagenum):
                    cur_i = i
                if (cur_i is not None) and (i is cur_i + 1):
                    print(f" Go to next page.")
                    pbutton.click()
                    break
            if cur_pagenum is last_pagenum:
                print(f" This is the last page. Stop.")
                cur_pagenum += 1
                break
            fr.report_mid(addi_info=f" cur_pagenum : {cur_pagenum} 완료.")
        fr.report_fin()

    def collect_job_details(self):
        if self.detect_job_details():
            self.click_job_description_see_more(5)
            self.click_applicant_insights_send_feedback(2)
            self.click_company_insights_more_company(2)
            self.debug_re_search(3)
            """sleepsecs 동안 job-details 읽는 척."""
            time.sleep(self.job_details_human_reading_secs)
            self.save_job_details()
        else:
            pass

    def loop_jobcards(self):
        self.detect_jobcards()
        while self.jobcards.iterable:
            """2초 후에 다음 job-card를 클릭."""
            time.sleep(self.jobcard_click_secs)
            self.jobcards.nextop()
            self.parse_jobcard()
            self.collect_job_details()
            # break

    """삭제 대상"""

    def loop_pagination_v1(self, sleepsecs=3):
        pg_result = True
        while pg_result:
            self.loop_jobcards()
            """다음 페이지 버튼을 누르기까지 sleepsecs 만큼 걸린다."""
            time.sleep(sleepsecs)
            pg_result = self.paginate()
            # break

    def loop_pagination(self):
        pi = PaginationIterator(func=self.loop_jobcards)
        while pi.iterable:
            pi.nextop()
            """다음 페이지 버튼을 누르기까지 sleepsecs 만큼 걸린다."""
            time.sleep(self.nextpage_click_secs)
예제 #9
0
class JobCards:
    def __init__(self):
        super(JobCards, self).__init__()

    def _click(self, webelem, **kwargs):
        """job-card-search--is-active"""
        try:
            webelem.click()
        except Exception as e:
            pass

    def detect_jobcards(self):
        jobcards_container = self.driver.find_elements_by_class_name(
            'jobs-search-two-pane__results')
        if len(jobcards_container) is 1:
            jobcards_contents = self.driver.find_elements_by_class_name(
                'jobs-search-results')
            if len(jobcards_contents) is 1:
                jobcard_ul = self.driver.find_elements_by_class_name(
                    'jobs-search-results__list')
                if len(jobcard_ul) is 1:
                    jobcards = self.driver.find_elements_by_class_name(
                        'artdeco-list__item')
                    self.jobcards_it = FunctionIterator(
                        obj=jobcards,
                        func=self._click,
                        dbgon=True,
                        caller=f"{self.__class__} | {inspect.stack()[0][3]}")
                    print(
                        f"{'='*60}\n{self.__class__} | {inspect.stack()[0][3]}\n Job-cards exist."
                    )
                else:
                    print(f"\n len(jobcard_ul) is not 1.\n")
            else:
                print(f"\n len(jobcards_contents) is not 1.\n")
        else:
            print(f"\n len(jobcards_container) is not 1.\n")
        return self

    def parse_jobcard(self):
        active_jobcard = self.driver.find_elements_by_class_name(
            'job-card-search--is-active')
        if len(active_jobcard) is 1:
            jobcard = active_jobcard[0].find_elements_by_class_name(
                'job-card-search__content-wrapper')
            if len(jobcard) is 1:
                job_title = jobcard[0].find_elements_by_class_name(
                    'job-card-search__title')
                if len(job_title) is 1:
                    self.title = job_title[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(job_title) is not 1.\n"
                    )

                companyname = jobcard[0].find_elements_by_class_name(
                    'job-card-search__company-name')
                if len(companyname) is 1:
                    self.companyname = companyname[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(companyname) is not 1.\n"
                    )

                location = jobcard[0].find_elements_by_class_name(
                    'job-card-search__location')
                if len(location) is 1:
                    self.location = location[0].text
                else:
                    print(
                        f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(location) is not 1.\n"
                    )

            else:
                print(
                    f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n len(jobcard) is not 1.\n"
                )
        else:
            print(
                f"{'#'*60}\n{self.__class__} | {inspect.stack()[0][3]}\n 이런 경우는 절대 발생할 수 없다.\n"
            )

    def loop_jobcards(self):
        self.detect_jobcards()
        while self.jobcards_it.iterable:
            """2초 후에 다음 job-card를 클릭."""
            time.sleep(self.jobcard_click_secs)
            self.jobcards_it.nextop()
            self.parse_jobcard()
            self.collect_job_details()