Beispiel #1
0
    def parse(self, task: Task):
        page = 0
        total_pages = 0
        total_profiles = 0
        while True:
            url = self.build_search_url(task, page)
            page += 1
            response = requests.get(url)
            bs = BeautifulSoup(response.content, BS_PARSER)
            links = self.get_links(bs)

            if not total_pages:
                total_pages = self.count_pages(bs)

            for link in links:
                profile = Profile(
                    keyword=task.keyword,
                    segment=task.segment,
                    site=task.site,
                    task=task,
                )
                self.fill_profile(profile, link)
                profile.save()
                total_profiles += 1

            if page > total_pages:
                break
        task.total_found = total_profiles
Beispiel #2
0
    def sync_regions(self) -> None:
        response = get(URL_REGIONS)
        json = response.json()

        if response.status_code > 400 or json is None:
            raise HTTPError

        site = Site.objects.filter(title=CLIENT_SUPREJOB).first()
        # удаляем что есть
        RegionDict.objects.filter(site=site).all().delete()
        # только Россия
        country = json[0]

        country_id = int(country['id'])
        country_name = country['title']

        self.save_town(country, site, country_id, country_name)
        if 'regions' in country:
            for region in country['regions']:
                self.save_town(region, site, country_id, country_name,
                               region['id'], region['title'])
Beispiel #3
0
    def refresh_credentials(self) -> None:
        if self.token_valid_till - time.time() > 10:
            return

        # обновляем или генерим новый
        has_token = '' not in (self.code, self.access_token, self.refresh_token)
        params = {'client_id': self.site.app_id, 'client_secret': self.site.app_secret, 'hr': 1}
        if has_token:
            params.update({'refresh_token': self.refresh_token})
            token_url = self.URL_REFRESH
        else:
            params.update({'login': self.site.login, 'password': self.site.password})
            token_url = self.URL_TOKEN
        response = get(token_url, params=params)

        if not validate_response(response, self.errors):
            raise HTTPError(str(self.errors))

        response_json = response.json()
        self.access_token = response_json['access_token']
        self.refresh_token = response_json['refresh_token']
        self.token_valid_till = int(response_json['ttl'])
Beispiel #4
0
    def api_search(self, task: Task) -> None:
        profiles_scanned = 0
        params = self.build_search_params(task)

        while profiles_scanned < task.limit:
            response = get(self.URL_RESUMES_SEARCH, params=params, headers=self.api_headers)

            if not validate_response(response, self.errors):
                return

            response_json = response.json()

            if self.total_profiles == 0:
                self.total_profiles = response_json.get('total', 0)
                task.total_found = self.total_profiles
                print('total: {}'.format(self.total_profiles))

            for p in response_json.get('objects', []):
                Profile(
                    site=self.site,
                    link=p.get('link', ''),
                    resume_id=p.get('id', ''),
                    name = p.get('firstname', ''),
                    lastname = p.get('lastname', ''),
                    outer_id=p.get('id_user', ''),
                    city=p.get('town', {}).get('title', ''),
                    info=str(p),
                    keyword=task.keyword,
                    task=task,
                    segment=task.segment,
                ).save()
                profiles_scanned += 1

            if not response_json.get('more', False):
                break
            else:
                params['page'] += 1
Beispiel #5
0
 def fill_profile(profile: Profile, url):
     response = requests.get(url)
     bs = BeautifulSoup(response.content, BS_PARSER)
     info = bs.find('div', attrs={'class': 'resume-wrapper'})
     profile.info = info.contents if info is not None else ''