class Cdata(GQDataHelper): edulevel = [["005","博士后"],["010","博士"],["020","MBA/EMBA"],["030","硕士"],["040","本科"]] # edulevel.extend([["050","大专"],["060","中专"],["070","中技"],["080","高中"],["090","初中"]]) sex = GQDataHelper.qlist('男', '女') userStatus=[[0,"在职,看看新机会"],[1,"离职,正在找工作"],[2,"在职,急寻新工作"],[3,"在职,暂无跳槽打算"]] agedata = [['0-25'], ['26-30'], ['31-32'], ['33-35'], ['36-40'], ['41-45'], ['46-55'], ['56-999']] # workyear = GQDataHelper.qlist('0-2', '3-5', '6-8', '9-999') workyear = GQDataHelper.qlist('5-10', '11-999') yearSalary = GQDataHelper.qlist('30-80','81-999') cst_template = {"keys":"","keysRelation":"","company_name":"","company_name_scope":"0","industrys":"","jobtitles" :"","dqs":"","wantdqs":"","edulevellow":"","edulevelhigh":"","edulevel_tz":"","school_kind" :"","agelow":"","agehigh":"","workyearslow":"","workyearshigh":"", "yearSalarylow": "", "yearSalaryhigh":"", "sex":"","userStatus":"","search_level" :"2"} pd_template = { "agehigh" : "", "search_level" : "1", "jobtitles" : "", "company_name" : "", "keys" : "", "workyearslow" : "", "search_scope" : "", "dqs" : "", "conditionCount" : "", "company_name_scope" : "0", "wantdqs" : "", "edulevellow" : "", "industrys" : "", "cs_createtime" : "", "sex" : "", "edulevelhigh" : "", "agelow" : "", "workyearshigh" : "", "so_translate_flag" : "1", "cstContent" : {}, "userStatus" : "", "cs_id" : "", "yearSalarylow":"", "yearSalaryhigh":"", } headers = {'DNT':1, 'Referer':'http://lpt.liepin.com/resume/soResumeNew/' } accounts = [{'u':'hr20065124', 'p':'zhaopin123'}] # tempfile = spider.racer.TempFileNames() # lt_accounts = [{'u':'*****@*****.**', 'p':'zhaopin123'}] # lt_accounts = [{'u':'*****@*****.**', 'p':'zhaopin123'}] lt_accounts = [{'u':'*****@*****.**', 'p':'zhaopin123'}]
def init_conditions(self): GQDataHelper.add(self, 'nc', nc) GQDataHelper.add(self, 'nc2', nc) GQDataHelper.add(self, 'dqs', dqs) GQDataHelper.add(self, 'compscale', compscale)
def init_conditions(self): GQDataHelper.add(self, 'companyType', companyType) GQDataHelper.add(self, 'degreeType', degreeType) GQDataHelper.add(self, 'experienceType', experienceType) GQDataHelper.add(self, 'industry', industry) GQDataHelper.add(self, 'jobType', jobType) GQDataHelper.add(self, 'payType', payType) GQDataHelper.add(self, 'cityKw', cityKw) self.bs2 = FileSaver("failed_urls.txt")
#!/usr/bin/env python # -*- coding:utf8 -*- import os import sys import spider from spider.savebin import FileSaver from spider.genquery import GenQueries, GQDataHelper import re import copy #来源网站 domain = GQDataHelper.qlist("百城招聘", "英才网联", "厦门人才网", "卓博人才网", "智通人才网", "拉勾网", "猎聘网", "中国医疗人才网", "36人才", "云南招聘网", "中国人才热线", "河南九博人才网", "中国汽车人才网", "海峡人才网", "江西人才人事网", "百才招聘", "中国美容人才热线", "河北搜才网") #公司性质 companyType = GQDataHelper.qlist("其他性质", "民营企业", "外资", "合资", "国企", "事业单位", "国家机关") #xue历要求 degreeType = GQDataHelper.qlist("初中", "高中", "中技", "中专", "大专", "本科", "硕士", "博士", "不限学历") #工作经验 experienceType = GQDataHelper.qlist("应届毕业生", "0-2年", "3-5年", "6-7年", "8-10年", "不限经验") #行业类型 industry = GQDataHelper.qlist("快速消费品(食品、饮料、化妆品)", "贸易/进出口", "互联网/电子商务", "电子技术/半导体/集成电路", "专业服务(咨询、人力资源、财会)", "计算机软件", "新能源", "批发/零售", "服装/纺织/皮革", "汽车及零配件", "金融/投资/证券", "外包服务", "制药/生物工程", "房地产", "交通/运输/物流", "教育/培训/院校", "机械/设备/重工", "家具/家电/玩具/礼品", "中介服务", "通信/电信/网络设备")
def init_conditions(self): #ALL=3277369 GQDataHelper.add(self, 'companyType', companyType) #0 0 GQDataHelper.add( self, 'degreeType', degreeType) #13 0.000003966596376544722306215748059 GQDataHelper.add(self, 'jobType', jobType) #2329 0.0007106309969978967885520367099 GQDataHelper.add(self, 'domain', domain) #0.01408784246034669669827704161 GQDataHelper.add( self, 'experienceType', experienceType) #13031 0.003976055183288790490176724073 GQDataHelper.add(self, 'payType', payType) #20364 0.006213520662458209618752114882
class Cdata(GQDataHelper): edulevel = [ ["005", "博士后"], ["010", "博士"], ["020", "MBA/EMBA"], ["030", "硕士"], ["040", "本科"], ["050", "大专"], ["060", "中专"], ["070", "中技"], ["080", "高中"], ["090", "初中"], ] sex = GQDataHelper.qlist('男', '女') userStatus = [[0, "在职,看看新机会"], [1, "离职,正在找工作"], [2, "在职,急寻新工作"], [3, "在职,暂无跳槽打算"]] agedata = [['0-25'], ['26-30'], ['31-35'], ['36-40'], ['41-45'], ['46-55'], ['56-999']] workyear = GQDataHelper.qlist('0-2', '3-5', '5-8', '9-999') cst_template = { "keys": "", "keysRelation": "", "company_name": "", "company_name_scope": "0", "industrys": "", "jobtitles": "", "dqs": "", "wantdqs": "", "edulevellow": "", "edulevelhigh": "", "edulevel_tz": "", "school_kind": "", "agelow": "", "agehigh": "", "workyearslow": "", "workyearshigh": "", "sex": "", "userStatus": "", "search_level": "2" } pd_template = { "agehigh": "", "search_level": "2", "jobtitles": "", "company_name": "", "keys": "", "workyearslow": "", "search_scope": "", "dqs": "", "conditionCount": "", "company_name_scope": "0", "wantdqs": "", "edulevellow": "", "industrys": "", "cs_createtime": "", "sex": "", "edulevelhigh": "", "agelow": "", "workyearshigh": "", "so_translate_flag": "1", "cstContent": {}, "userStatus": "", "cs_id": "" } headers = { 'DNT': 1, 'Referer': 'http://lpt.liepin.com/resume/soResumeNew/' } accounts = [{'u': '深圳市晶伯川科技有限公司', 'p': 'zhaopin123'}] tempfile = spider.racer.TempFileNames() try: lpcvstore = LPCVStore() except: print '===== using fake store ======' lpcvstore = FakeCVStore()
def init_conditions(self): GQDataHelper.add(self, 'jl', jl) GQDataHelper.add(self, 'cs', cs) GQDataHelper.add(self, 'ct', ct) GQDataHelper.add(self, 'el', el) #GQDataHelper.add(self, 'we', we) #不能正交切分。.... GQDataHelper.add(self, 'in', in_) GQDataHelper.add(self, 'salary', salary) GQDataHelper.add(self, 'bj', bj) GQDataHelper.add(self, 'et', et) self.bs2 = FileSaver("failed_urls.txt")