예제 #1
0
파일: contact.py 프로젝트: mylove1/spiders
    def __parse_resume_details(self, response_datas):
        resume = response_datas
        yfkeywords = Keywords()
        _resume = {}
        assert resume
        if 'resume' not in resume:
            raise Exception('No Resume Return! Maybe Over 300!')
        _resume["sex"] = yfkeywords.Sex(str(resume["resume"].get("sex"))) if resume["resume"].get("sex", None) else None
        _resume["jobState"] = yfkeywords.JobState(str(resume["resume"].get("jobState"))) if resume["resume"].get(
            "jobState") else None
        _resume["maritalStatus"] = yfkeywords.MaritalStatus(str(resume["resume"].get("maritalStatus"))) if resume[
            "resume"].get("maritalStatus") else None

        _resume["expectWorkType"] = yfkeywords.Worktype(str(resume["resume"].get("expectWorkType"))) if resume[
            "resume"].get("expectWorkType", None) else None

        _resume["education"] = yfkeywords.Education(str(resume["resume"].get("education"))) if resume["resume"].get(
            "education", None) else None

        for field in ('expectCity', 'city', 'province', 'hukouProvince', 'hukouCity'):
            if "," in str(resume["resume"].get(field)):
                citys = str(resume["resume"].get(field))
                parsed_citys = []
                for i in citys.split(","):
                    parsed_citys.append(yfkeywords.Expectcity(str(i)))
                _resume[field] = ",".join(parsed_citys)
            else:
                _resume[field] = yfkeywords.Expectcity(str(resume["resume"].get(field))) if resume["resume"].get(field,
                                                                                                                 None) else None

        _resume["expectSalary"] = yfkeywords.Expectsalary(str(resume["resume"].get("expectSalary"))) if resume[
            "resume"].get("expectSalary", None) else None
        if "," in str(resume["resume"].get("jobTitle")):
            jobtitles = str(resume["resume"].get("jobTitle"))
            parsed_jobtitles = []
            for i in jobtitles.split(","):
                parsed_jobtitles.append(yfkeywords.Jobtitle(str(i)))
            _resume["jobTitle"] = ",".join(parsed_jobtitles)
        else:
            _resume["jobTitle"] = yfkeywords.Jobtitle(str(resume["resume"].get("jobTitle"))) if resume["resume"].get(
                "jobTitle", None) else None

        for k, v in _resume.iteritems():
            resume['resume'][k] = v

        for field in ['work_experiences', 'educations']:
            if field in resume:
                items = []
                for item in resume[field]:
                    if 'salary' in item:
                        item["salary"] = yfkeywords.Expectsalary(str(item.get("salary"))) if item.get("salary",
                                                                                                      None) else None
                    if 'compSize' in item:
                        item["compSize"] = yfkeywords.CompSize(str(item.get("compSize"))) if item.get("compSize",
                                                                                                      None) else None
                    if 'compIndustry' in item:
                        item["compIndustry"] = yfkeywords.Industry(str(item.get("compIndustry"))) if item.get(
                            "compIndustry", None) else None
                    if 'compProperty' in item:
                        item["compProperty"] = yfkeywords.CompProperty(str(item.get("compProperty"))) if item.get(
                            "compProperty", None) else None

                    if 'education' in item:
                        item["education"] = yfkeywords.Education(str(item.get("education"))) if item.get("education",
                                                                                                         None) else None

                    items.append(item)
                resume[field] = items
        return resume
예제 #2
0
파일: spider.py 프로젝트: mylove1/spiders
    def download_resume(self, id, headers):
        logger.info('headers %s of download resume' % (headers))
        try_times = 0
        url = "http://www.yifengjianli.com/bidme/getUserResume"
        _resume = {}
        yfkeywords = Keywords()
        while True:
            try_times += 1
            try:
                time.sleep(random.uniform(3, 10))
                response = self.session.post(url, data={
                    "userId": id,
                    "resumeCookie": "",
                }, headers=headers, timeout=30, proxies=self.proxies)
                assert response
                assert response.status_code == 200
                response.encoding = 'utf-8'
            except Exception:
                logger.warning(
                    'fetch url %s with %s fail:\n%s' % (url, self.proxies, traceback.format_exc()))
                if try_times > 5:
                    raise Exception("PROXY_FAIL!")
                else:
                    time.sleep(30)
            else:
                break
        resume = json.loads(response.text)
        assert resume
        if 'resume' not in resume:
            raise Exception('No Resume Return! Maybe Over 300!')
        _resume["sex"] = yfkeywords.Sex(str(resume["resume"].get("sex"))) if resume["resume"].get("sex", None) else None
        _resume["jobState"] = yfkeywords.JobState(str(resume["resume"].get("jobState"))) if resume["resume"].get("jobState") else None
        _resume["maritalStatus"] = yfkeywords.MaritalStatus(str(resume["resume"].get("maritalStatus"))) if resume["resume"].get("maritalStatus") else None

        _resume["expectWorkType"] = yfkeywords.Worktype(str(resume["resume"].get("expectWorkType"))) if resume["resume"].get("expectWorkType", None) else None

        _resume["education"] = yfkeywords.Education(str(resume["resume"].get("education"))) if resume["resume"].get("education", None) else None

        for field in ('expectCity', 'city', 'province', 'hukouProvince', 'hukouCity'):
            if "," in str(resume["resume"].get(field)):
                citys = str(resume["resume"].get(field))
                parsed_citys = []
                for i in citys.split(","):
                    parsed_citys.append(yfkeywords.Expectcity(str(i)))
                _resume[field] = ",".join(parsed_citys)
            else:
                _resume[field] = yfkeywords.Expectcity(str(resume["resume"].get(field))) if resume["resume"].get(field, None) else None

        _resume["expectSalary"] = yfkeywords.Expectsalary(str(resume["resume"].get("expectSalary"))) if resume["resume"].get("expectSalary", None) else None
        if "," in str(resume["resume"].get("jobTitle")):
            jobtitles = str(resume["resume"].get("jobTitle"))
            parsed_jobtitles = []
            for i in jobtitles.split(","):
                parsed_jobtitles.append(yfkeywords.Jobtitle(str(i)))
            _resume["jobTitle"] = ",".join(parsed_jobtitles)
        else:
            _resume["jobTitle"] = yfkeywords.Jobtitle(str(resume["resume"].get("jobTitle"))) if resume["resume"].get("jobTitle", None) else None

        for k, v in _resume.iteritems():
            resume['resume'][k] = v

        for field in ['work_experiences', 'educations']:
            if field in resume:
                items = []
                for item in resume[field]:
                    if 'salary' in item:
                        item["salary"] = yfkeywords.Expectsalary(str(item.get("salary"))) if item.get("salary", None) else None
                    if 'compSize' in item:
                        item["compSize"] = yfkeywords.CompSize(str(item.get("compSize"))) if item.get("compSize", None) else None
                    if 'compIndustry' in item:
                        item["compIndustry"] = yfkeywords.Industry(str(item.get("compIndustry"))) if item.get("compIndustry", None) else None
                    if 'compProperty' in item:
                        item["compProperty"] = yfkeywords.CompProperty(str(item.get("compProperty"))) if item.get("compProperty", None) else None

                    if 'education' in item:
                        item["education"] = yfkeywords.Education(str(item.get("education"))) if item.get("education", None) else None

                    items.append(item)
                resume[field] = items

        return json.dumps(resume, ensure_ascii=False)