Example #1
0
class JobFilter(object):
    
    def __init__(self, sites):
        self._sites = sites
        self._jobs = []
        self._bad_titled_jobs = []
        self._bad_content_jobs = []

        self._filter = Filter()

    def get_todays_jobs(self):
        self._get_todays_links()
        self._filter_on_titles()
        self._get_postings_content()
        self._filter_on_content()
        return (self._jobs, self._bad_content_jobs, self._bad_titled_jobs)

    def _get_todays_links(self):
        for site in self._sites:
            list_soup = self._beautiful_soupify_url(site.get_job_listing_url())
            links = site.get_todays_links(list_soup)

            jobs = []
            for link in links:
                jobs.append(Job(link.string, link['href'], site))

        self._jobs = jobs

    def _filter_on_titles(self):
        get_posting_jobs = []
        for job in self._jobs:
            if self._filter.title(job.get_title()):
                get_posting_jobs.append(job)
            else:
                self._bad_titled_jobs.append(job)

        self._jobs = get_posting_jobs
    
    def _get_postings_content(self):
        for job in self._jobs:
            content_soup = self._beautiful_soupify_url(job.get_link())
            job.set_content(content_soup)

    def _filter_on_content(self):
        good_jobs = []
        for job in self._jobs:
            if self._filter.content(job.get_content()):
                good_jobs.append(job)
            else:
                self._bad_content_jobs.append(job)

        self._jobs = good_jobs

    def _beautiful_soupify_url(self, url):
        html = urllib2.urlopen(url).read()
        return BeautifulSoup(html)