def get_filtered_jobs(request): jobs = Job.query() if request.GET: values = dict([(k,request.GET[k]) for k in request.GET if k not in ('exclude_ids','destinatary_or_empty')]) jobs = jobs.filter(**values) if request.GET.get('exclude_ids',None): jobs = jobs.exclude(pk__in=request.GET['exclude_ids'].split(',')) if request.GET.get('destinatary_or_empty',None): jobs = jobs.filter(destinatary__in=(request.GET['destinatary_or_empty'],None,'')) return jobs
def clear_database(): ''' Clears the temporary database upon new search" ''' all_objects = Job.query().fetch() for a in all_objects: a.key.delete()
def post(self): # Clear database of previous results clear_database() job = cgi.escape(self.request.get("job")) location = cgi.escape(self.request.get("location")) #example query, defaults to searching for a Software Engineer in San Jose if len(location) < 1: location = "San Jose, CA" if len(job) < 1: job = "Software Engineer" #variables using user's query that are used to search indeed & dice indeed_job = job.replace(" ", "+") indeed_loc = location.replace(" ", "+") indeed_loc = indeed_loc.replace(",", "%2C") dice_job = job.replace(" ", "+") dice_loc = location.replace(" ", "+") dice_loc = dice_loc.replace(",", "%2C") #base indeed & dice url where user inputs are added indeed_url = "http://www.indeed.com/jobs?q=%s&l=%s" % (indeed_job, indeed_loc) dice_url = "https://www.dice.com/jobs?q=%s&l=%s" % (dice_job, dice_loc) #initialize beautiful soup object for indeed and dice indeed = urlopen(indeed_url) indeed_soup = BeautifulSoup(indeed, "html.parser") dice = urlopen(dice_url) dice_soup = BeautifulSoup(dice, "html.parser") # INDEED Parsing #check for errors in indeed query bad_query = indeed_soup.find_all("div", {"class": "bad_query"}) invalid_location = indeed_soup.find_all("div", {"class": "invalid_location"}) #if there are no errors parse info from Indeed #Title of job, title of company, location of job, description of job, link for job if len(bad_query) == 0 and len(invalid_location) == 0: titles = indeed_soup.find_all("a", {"data-tn-element": "jobTitle"}) companies = indeed_soup.findAll("span", {"class", "company"}) loc = indeed_soup.find_all("span", {"class": "location"}) desc = indeed_soup.find_all("span", {"class": "summary"}) # jobURLS = indeed_soup.find_all("a", {"class": "jobtitle"}) jobURLS = indeed_soup.find_all("a", {"class": "turnstileLink"}) #add all job info to i_job for t, c, l, d, h in zip(titles, companies, loc, desc, jobURLS): print t if t: i_job = Job() i_job.title = t.get_text().strip() i_job.company = c.get_text().strip() i_job.location = l.get_text().strip() i_job.description = d.get_text().encode("utf8").strip() i_job.href = h.get("href") i_job.site = "indeed" i_job.put() # DICE Parsing # parse info into dice_jobs and locations dice_jobs = dice_soup.findAll('div', {'class': 'serp-result-content'}) locations = dice_soup.find_all("li", {"class": "location"}) # diceJobURLS = dice_soup.find_all("a", {"class": "dice-btn-link"}) for job, loc in zip(dice_jobs, locations): d_job = Job() exists = job.find("a", {"class": "dice-btn-link"}).get("title") if exists: #if everything exists.. add job info from Dice into d_job d_job = Job() d_job.title = job.find("a", {"class": "dice-btn-link"}).get("title").strip() d_job.company = job.find("li", {"class": "employer"}).get_text().strip() desc = job.find("div", {"class": "shortdesc"}).get_text().encode("utf8") d_job.description = str(desc).strip() d_job.location = loc.get_text() d_job.href = job.find("a", {"class": "dice-btn-link"}).get('href') d_job.site = "dice" # Store to database d_job.put() else: print("Bad search query. Please check your spelling") #error handling. If theres a bad query for either indeed or dice print an error # Query database for new jobs d_jobs = Job.query(Job.site == "dice").fetch() i_jobs = Job.query(Job.site == "indeed").fetch() self.response.out.write(template.render('views/index.html', {'d_jobs': d_jobs, 'i_jobs': i_jobs}))