예제 #1
0
def glassdoor_jobs(url):
    req = requests.get(url, headers={"User-agent": "job_bot 1.0"})
    soup = BeautifulSoup(req.content, "html.parser")
    tags = soup.findAll("li", class_="jl react-job-listing gdGrid")
    print(len(tags))
    for tag in tags:
        listing = Listing()
        title = tag.find(
            "a", class_="jobInfoItem jobTitle css-13w0lq6 eigr9kq1 jobLink"
        ).span.text
        listing.title = title[:35] + "..." if len(title) > 40 else title
        listing.company = tag.find(
            "div",
            class_="jobHeader d-flex justify-content-between align-items-start"
        ).a.span.text
        salary = tag.find("div", class_="salaryEstimate ")
        listing.salary = salary.span.span.text if salary is not None else "Not Listed"
        location = tag.find("div",
                            class_="d-flex flex-wrap css-yytu5e e1rrn5ka1")
        listing.location = location.span.text if location is not None else "US"
        listing.date = "24hr"
        listing.link = "https://www.glassdoor.com" + tag.find(
            "a", class_="jobLink").get("href")
        listing.logo = "https://www.adweek.com/agencyspy/wp-content/uploads/sites/7/2016/01/glassdoor.jpg"
        exists = False
        for job in jobs:
            if listing == job:
                exists = True
                break
        if not exists:
            jobs.append(listing.to_dict())
예제 #2
0
def indeed_jobs(url):
    req = requests.get(url, headers={"User-agent": "job_bot 1.0"})
    soup = BeautifulSoup(req.content, "html.parser")
    tags = soup.findAll("div", class_="jobsearch-SerpJobCard")
    for tag in tags:
        title = tag.find("h2", class_="title").a.get("title").strip()
        if listing_filter(title):
            listing = Listing()
            listing.title = title[:35] + "..." if len(
                title) > 40 else title  # 75 and 80
            listing.company = tag.find("div",
                                       class_="sjcl").div.span.text.lstrip()
            salary = tag.find("span", class_="salaryText")
            listing.salary = salary.text.lstrip(
            ) if salary is not None else "Not Listed"
            listing.location = tag.find("div", class_="recJobLoc")[
                "data-rc-loc"]  # same as .get("data-rc-loc")
            listing.date = (date.today() -
                            timedelta(days=1)).strftime('%y-%m-%d')
            listing.link = f"https://www.google.com/search?q={title}+{listing.company}+{listing.location}+{listing.date}+job+opening"
            listing.logo = "https://is2-ssl.mzstatic.com/image/thumb/Purple118/v4/ab/03/b8/ab03b82b-12cf-ce7c-249f-b54a8f01c1b9/AppIcon-1x_U007emarketing-85-220-0-6.png/246x0w.jpg"
            exists = False
            for job in jobs:
                if listing == job:
                    exists = True
                    break
            if not exists:
                jobs.append(listing.to_dict())