Beispiel #1
0
 def get_jobs(self):
     jobs = []
     self.get_max_page()
     for page_number in range(self.max_page):
         page_soup = self.request_page(page_number=page_number)
         vacancy_list = page_soup.find_all('div', 'vacancy-serp-item')
         for vacancy in vacancy_list:
             title = vacancy.find('a').text
             link = vacancy.find('a')['href']
             location = vacancy.find(
                 'span', 'vacancy-serp-item__meta-info').text.split(',')[0]
             company = vacancy.find(
                 'div', 'vacancy-serp-item__meta-info-company').text
             salary = vacancy.find(
                 'div', 'vacancy-serp-item__sidebar').find('span')
             if salary is not None:
                 salary = salary.text
             else:
                 salary = ''
             description = vacancy.select(
                 'div[data-qa="vacancy-serp__vacancy_snippet_responsibility"]'
             )[0].text
             jobs.append(
                 Vacancy(title=title,
                         link=link,
                         location=location,
                         company=company,
                         salary=salary,
                         description=description))
     return jobs
 def get_jobs(self):
     jobs = []
     self.get_max_page()
     for page_number in range(1, self.max_page + 1):
         page_soup = self.request_page(page_number=page_number)
         vacancy_list = page_soup.find_all('div', {'class': '-job'})
         for vacancy in vacancy_list:
             title = vacancy.find('h2').text
             link = f'https://stackoverflow.com/{vacancy.find("h2").find("a")["href"]}'
             company_data = vacancy.find('h3').find_all('span',
                                                        recursive=False)
             location = company_data[1].text
             company = company_data[0].text
             jobs.append(
                 Vacancy(title=title,
                         link=link,
                         location=location,
                         company=company,
                         salary='',
                         description=''))
     return jobs
Beispiel #3
0
                                }
                            companydb.insert(new_company_info)
                            company_object_id = companydb.find_one(
                                {"name": company})
                            company_object_id = company_object_id["_id"]
                            print(company_object_id)
                            print("done")
                        else:
                            company_object_id = companydb.find_one(
                                {"name": company})
                            company_object_id = company_object_id["_id"]
                            print("Company already exists: ",
                                  company_object_id)

                        # Vacany User
                        vacancy_returned = Vacancy(vacancy_link, location_id,
                                                   cookies)
                        if vacancy_returned["Email"] == "":
                            if vacancy_returned["Phone_Number"] == "":
                                user_object_id = 100000000000000000000000
                            else:
                                check = userdb.find_one({
                                    "phone":
                                    vacancy_returned["Phone_Number"]
                                })
                                if check is None:
                                    phone = vacancy_returned["Phone_Number"]
                                    add = phone.split(" ", 1)
                                    code = add[0]
                                    code = code.replace("+", "")
                                    number = add[1]
                                    number = number.replace(" ", "")
Beispiel #4
0
                try:
                    logo = Selector(response=page).xpath(f'/html/body/div[{div}]/div/img/@src').get()
                    logo = "https://www.worknet.am" + logo
                except:
                    logo = ""


                data = {
                    "company" : company,
                    "c_link" : c_link,
                    "position" : position,
                    "v_link" : v_link,
                    "logo" : logo
                }

                returned = Vacancy(v_link)

                print(data)

                
                # COMPANY DATA
                # Check if company already exists in a collection
                check = companydb.find_one({"name" : company})
                if check is None:
                    new_company_info = {
                        "name" : company,
                        "url" : c_link,
                        "industry" : "1",
                        "logo" : logo,
                        "created_at" : datetime.datetime.utcnow(),
                        "websites" : returned["website"],
Beispiel #5
0
                                "custom_button_title": "Visit",
                                "custom_button_url": c_link
                            },
                            "country": "GE"
                        }
                    companydb.insert(new_company_info)
                    company_object_id = companydb.find_one({"name": company})
                    company_object_id = company_object_id["_id"]
                    print(company_object_id)
                    print("done")
                else:
                    company_object_id = companydb.find_one({"name": company})
                    company_object_id = company_object_id["_id"]
                    print("Company already exists: ", company_object_id)

                vacancy_returned = Vacancy(v_link, cookies)

                # Vacancy User
                if vacancy_returned["email"] == "":
                    user_object_id = 100000000000000000000000
                else:
                    check = userdb.find_one(
                        {"email": vacancy_returned["email"]})
                    if check is None:
                        new_user_info = {
                            "email": vacancy_returned["email"],
                            "company_id": ObjectId(f"{company_object_id}"),
                            "created_at": datetime.datetime.utcnow()
                        }
                        userdb.insert(new_user_info)
                        user_object_id = userdb.find_one(
Beispiel #6
0
                               attrs={
                                   'data-qa': 'vacancy-serp__vacancy-employer'
                               }).text
            requirement = div.find(
                "div",
                attrs={
                    'data-qa': 'vacancy-serp__vacancy_snippet_requirement'
                }).text
            salary = div.find('div',
                              attrs={
                                  'data-qa':
                                  'vacancy-serp__vacancy-compensation'
                              }).text
            vac = Vacancy(title=title,
                          href=href,
                          company=company,
                          requirement=requirement,
                          salary=salary)
            vacancy_list.append(vac)

        except:
            pass

    for i in vacancy_list:
        print(i.get_title(), i.get_salary(), i.get_requirement())

    data2 = {
        "title": [x.get_title() for x in vacancy_list],
        "href": [x.get_href() for x in vacancy_list],
        "company": [x.get_company() for x in vacancy_list],
        "requirement": [x.get_requirement() for x in vacancy_list],
Beispiel #7
0

session = requests.Session()
req = session.get(url, headers = headers)


if req.status_code == 200:
    soup = bs(req.content, "html.parser")
    divs = soup.find_all("div", attrs = {"data-qa":"vacancy-serp__vacancy"})
    for div in divs:
        try:
            title = div.find("a", attrs = {"data-qa":"vacancy-serp__vacancy-title"}).text
            company = div.find("a", attrs = {"data-qa":"vacancy-serp__vacancy-employer"}).text
            salary = div.find("div", attrs = {"data-qa":"vacancy-serp__vacancy-compensation"}).text
            
            vac = Vacancy(text_request, title, company, salary)
            vac.salaryParse()
            vac.save_to_db()
        except:
            pass
else:
    print("ERROR!", req.status_code)