def post(self): job = Job() job.title = self.request.get("title") job.description = self.request.get("description") job.salary = self.request.get("salary") job.location = self.request.get("location") job.industry = self.request.get("industry") job.contact_phone = self.request.get("contact_phone") job.job_type = self.request.get("job_type") job.company = self.request.get("company") job.contact_name = self.request.get("contact_name") job.contact_email = self.request.get("contact_email") job.put() self.response.out.write(job.to_json("title", "is_deleted", "is_active", "is_starred", "when_created"))
def post(self): job = Job() #job.poster = users.get_current_user() job.title = self.request.get('title') job.description = self.request.get('description') job.salary = self.request.get('salary') job.location = self.request.get('location') job.industry = self.request.get('industry') job.contact_phone = self.request.get('contact_phone') job.job_type = self.request.get('job_type') job.company = self.request.get('company') job.contact_name = self.request.get('contact_name') job.contact_email = self.request.get('contact_email') job.is_active = True job.put() #self.response.out.write(job.to_json('title', 'is_deleted', 'is_active', 'is_starred', 'when_created')) self.redirect('/alumni/jobs/')
def save_job(self, html_doc): job = Job() parser = etree.HTMLParser() tree = etree.parse(StringIO(html_doc), parser) #title title = tree.xpath('//td[@class=\'sr_bt\']/text()') for i in title: job.title = title[0] break #job_detail job_detail = tree.xpath('//td[contains(@class, \'txt_4 wordBreakNormal job_detail\')]/div/text()') for i in job_detail: job.detail = job_detail[0] break welfare = tree.xpath('//span[contains(@class, \'Welfare_label\')]/text()') for w in welfare: job.welfare.add(w) #date location saraly txt1 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_1\')]') txt2 = tree.xpath('//table[contains(@class, \'jobs_1\')]/tr/td[contains(@class, \'txt_2\')]') txt1_tag = ['发布日期:', '工作地点:', '薪水范围:' ] for i, e in enumerate(txt1): if len(e.text.lstrip()) == 0: break if txt1[i].text == '发布日期:': #hdls[txt1[i].text] = txt2[i].text job.date = txt2[i].text if txt1[i].text == '工作地点:': job.location = txt2[i].text if txt1[i].text == '薪水范围:': job.salary = txt2[i].text job.save() #need for speed self.save_company(tree)
def post(self): # Clear database of previous results clear_database() job = cgi.escape(self.request.get("job")) location = cgi.escape(self.request.get("location")) #example query, defaults to searching for a Software Engineer in San Jose if len(location) < 1: location = "San Jose, CA" if len(job) < 1: job = "Software Engineer" #variables using user's query that are used to search indeed & dice indeed_job = job.replace(" ", "+") indeed_loc = location.replace(" ", "+") indeed_loc = indeed_loc.replace(",", "%2C") dice_job = job.replace(" ", "+") dice_loc = location.replace(" ", "+") dice_loc = dice_loc.replace(",", "%2C") #base indeed & dice url where user inputs are added indeed_url = "http://www.indeed.com/jobs?q=%s&l=%s" % (indeed_job, indeed_loc) dice_url = "https://www.dice.com/jobs?q=%s&l=%s" % (dice_job, dice_loc) #initialize beautiful soup object for indeed and dice indeed = urlopen(indeed_url) indeed_soup = BeautifulSoup(indeed, "html.parser") dice = urlopen(dice_url) dice_soup = BeautifulSoup(dice, "html.parser") # INDEED Parsing #check for errors in indeed query bad_query = indeed_soup.find_all("div", {"class": "bad_query"}) invalid_location = indeed_soup.find_all("div", {"class": "invalid_location"}) #if there are no errors parse info from Indeed #Title of job, title of company, location of job, description of job, link for job if len(bad_query) == 0 and len(invalid_location) == 0: titles = indeed_soup.find_all("a", {"data-tn-element": "jobTitle"}) companies = indeed_soup.findAll("span", {"class", "company"}) loc = indeed_soup.find_all("span", {"class": "location"}) desc = indeed_soup.find_all("span", {"class": "summary"}) # jobURLS = indeed_soup.find_all("a", {"class": "jobtitle"}) jobURLS = indeed_soup.find_all("a", {"class": "turnstileLink"}) #add all job info to i_job for t, c, l, d, h in zip(titles, companies, loc, desc, jobURLS): print t if t: i_job = Job() i_job.title = t.get_text().strip() i_job.company = c.get_text().strip() i_job.location = l.get_text().strip() i_job.description = d.get_text().encode("utf8").strip() i_job.href = h.get("href") i_job.site = "indeed" i_job.put() # DICE Parsing # parse info into dice_jobs and locations dice_jobs = dice_soup.findAll('div', {'class': 'serp-result-content'}) locations = dice_soup.find_all("li", {"class": "location"}) # diceJobURLS = dice_soup.find_all("a", {"class": "dice-btn-link"}) for job, loc in zip(dice_jobs, locations): d_job = Job() exists = job.find("a", {"class": "dice-btn-link"}).get("title") if exists: #if everything exists.. add job info from Dice into d_job d_job = Job() d_job.title = job.find("a", {"class": "dice-btn-link"}).get("title").strip() d_job.company = job.find("li", {"class": "employer"}).get_text().strip() desc = job.find("div", {"class": "shortdesc"}).get_text().encode("utf8") d_job.description = str(desc).strip() d_job.location = loc.get_text() d_job.href = job.find("a", {"class": "dice-btn-link"}).get('href') d_job.site = "dice" # Store to database d_job.put() else: print("Bad search query. Please check your spelling") #error handling. If theres a bad query for either indeed or dice print an error # Query database for new jobs d_jobs = Job.query(Job.site == "dice").fetch() i_jobs = Job.query(Job.site == "indeed").fetch() self.response.out.write(template.render('views/index.html', {'d_jobs': d_jobs, 'i_jobs': i_jobs}))