def main(keyword=None, location=None): h = httplib2.Http() resp, data = h.request("http://service.dice.com/api/rest/jobsearch/v1/simple.json?text=%s" % keyword, "GET") if resp.get("status") != "200": print("%s: server error %s" % (__file__, resp.get("status"))) return 1 data1 = data.decode("utf-8") parser_json = json.loads(data1) item_list = parser_json["resultItemList"] for item in item_list: company = update_company(item["company"]) platform, created = models.Platform.objects.get_or_create(name=os.path.basename(__file__).replace(".py", "")) match = re.search("/result/([\d\S-]+)/", item["detailUrl"]) if match: job_id = match.group(1) else: log.warning("job id not found %s" % item.get("detailUrl")) continue try: job = models.Job.objects.get(platform=platform, platform_job_id=job_id) except models.Job.DoesNotExist: job = models.Job() job.company = company job.position, created = models.Position.objects.get_or_create(name=item["jobTitle"]) job.location, created = models.Location.objects.get_or_create(place=item["location"]) job.platform = platform job.platform_job_id = job_id job.save()
def main(keyword=None, location=None): h = httplib2.Http(".cache") #resp, data = h.request('https://vieclam24h.vn/tim-kiem-viec-lam-nhanh/?hdn_nganh_nghe_cap1=&hdn_dia_diem=&hdn_tu_khoa=%s&hdn_hinh_thuc=&hdn_cap_bac=' % keyword, 'GET') resp, data = h.request('https://vieclam24h.vn/tim-kiem-viec-lam-nhanh/&hdn_tu_khoa=%s' % keyword, 'GET') if resp.get('status') != '200': log.warning('%s: server error %s' % (__file__, resp.get('status'))) return 1 country = models.Country.objects.get(iso='VN') soup = BeautifulSoup(data, 'html.parser') for item in soup.findAll('a', {'class': 'text_grey2'}): position, created = models.Position.objects.get_or_create(name=item.string.strip()) match = re.search('id([\d]+)\.html', item.get('href')) if match: job_id = match.group(1) else: log.warning('job id not found %s' % item.get('href')) continue resp, data = h.request(item.get('href'), 'GET') details = BeautifulSoup(data, 'html.parser') kwargs = {} for detail in details.findAll('p', {'class':'line-icon'}): job_value = detail.find('span', {'class':'job_value'}) if not job_value: job_value = detail.find('a', {'class':'job_value'}) if job_value: job_value = job_value.string kwargs[DATA_MAP.get(detail.find('i').get('class')[0])] = job_value kwargs['location'] = details.find('address').string if not kwargs.get('location'): log.warning('job location not found %s' % item.get('href')) continue category, created = models.Category.objects.get_or_create(name=kwargs.get('category') or 'test') position.category = category kwargs['position'] = position try: del kwargs['category'] except KeyError: pass len_address = len(kwargs.get('location').split(', ')) street = ', '.join(kwargs.get('location').split(', ')[:len_address-1]) place = kwargs.get('location').split(', ')[-1] job_locations = models.Location.objects.filter(place=place, street=street, country=country) if job_locations: job_location = job_locations[0] else: job_location = models.Location() job_location.place = place.strip() job_location.street = street[:100] job_location.country = country job_location.save() kwargs['location'] = job_location if kwargs.get('years_of_experience'): kwargs['years_of_experience'] = kwargs['years_of_experience'].replace(' năm', '').replace('Dưới ', '').replace('Hơn ', '') kwargs['years_of_experience'] = kwargs['years_of_experience'].replace('Chưa có kinh nghiệm', '0') company_name = details.find('h3', {'class': 'font18'}).string company = update_company(company_name) kwargs['company'] = company platform, created = models.Platform.objects.get_or_create(name=os.path.basename(__file__).replace('.py', '')) del kwargs[None] if kwargs.get('salary'): match = re.search('([\d]+) - ([\d]+) triệu$', kwargs['salary']) if match: kwargs['salary'] = int(match.group(1)) else: del kwargs['salary'] if kwargs.get('hours_per_week') == 'Toàn thời gian cố định': kwargs['hours_per_week'] = 40 try: job = models.Job.objects.get(platform=platform, platform_job_id=job_id) except models.Job.DoesNotExist: kwargs['platform'] = platform kwargs['platform_job_id'] = job_id job = models.Job.objects.create(**kwargs)