def dealGetBar(company, position, welfare): session = Util.getDBSession("root", "chuanzhi", "lagou")() data = Util.getBar(company, session) session.close() data.sort(key=lambda x: x[1], reverse=True) data = data return data
def deal(company: str, position: str, warfare: str): session = Util.getDBSession("root", "chuanzhi", "lagou")() db_data = Util.threeParm(company, session) data = ServiceUtil.dealData(db_data, position, warfare) data = ServiceUtil.dealThreeParm(data) session.close() return data
def getCityData(company, position, welfare, city): session = Util.getDBSession("root", "chuanzhi", "lagou")() data = Util.getCompanyCityData( company, city, random.randint( 1, math.ceil(Util.getCompanyNum(company, city, session) / 20)), 20, session) data = list(map(ServiceUtil.dealCityData, data)) session.close() return data
def testLogin(username, password): session = Util.getDBSession("root", "chuanzhi", "bookstore")() print(session) account = Util.queryUserByUsername(username, session) session.close() if not account: return False if str(account.accountId) != password: return False return True
def getJobObject(company_id): session = Util.getDBSession("root", "chuanzhi", "lagou")() path = "../spider/data/jobs/{}/".format(company_id) name_list = os.listdir(path) for i in name_list: with open(path + i, encoding="utf-8") as fp: try: data = json.load(fp) except Exception as e: print(e) return li = list() if data.get("content", False): for j in data["content"]["data"]["page"]["result"]: if j.get("district", False) and j.get( 'industryField', False) and j.get( "companyLogo", False) and j.get( "positionAdvantage", False): jobs = Jobs(company_id=j['companyId'], position_id=j['positionId'], job_nature=j['jobNature'], finance_stage=j['financeStage'], company_name=j['companyName'], company_full_name=j['companyFullName'], company_size=j['companySize'], industry_field=j['industryField'], position_name=j['positionName'], city=j['city'], create_time=datetime.now(), salary=j['salary'], work_year=j['workYear'], education=j['education'], position_advantage=j['positionAdvantage'], company_label_list=",".join( j['companyLabelList']), user_id=j['userId'], company_logo=j['companyLogo'], district=j['district']) li.append(jobs) Util.addJobsToDataBase(session, li) print(i) print("--" * 50) return li
def test_company(company: Company, li, session): for i in li: if company.company_id == i.company_id: return False if Util.getCompanyId(company.company_id, session=session).count() != 0: return False return True
def createDir(path): if not os.path.exists(path): os.mkdir(path) def save(fp, n, company_id): with open("data/jobs/{}/{}.txt".format(company_id, n), "w", encoding="utf-8") as f: f.write(fp.text) if __name__ == '__main__': session = Util.getDBSession("root", "chuanzhi", "lagou")() company_list = list(set(Util.selectAllCompany(session))) session.close() length = len(company_list) headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75\ Safari/537.36", "host": "www.lagou.com", "referer": "https://www.lagou.com/gongsi/62.html" } temp = 0
import os import time from Dao.query import Util if __name__ == '__main__': # os.listdir("../spider/data/companyPage/") # print(time.time()) # print(list(os.listdir("../spider/data/companyPage/2/"))) session = Util.getDBSession("root", "chuanzhi", "lagou")() if Util.getCompanyId(62, session=session).count() == 0: print("wujieguo") session.close()
def getCompanyObject(): session = Util.getDBSession("root", "chuanzhi", "lagou")() for jk in os.listdir("../spider/data/companyPage/"): li = list() for i in range( 1, len(os.listdir("../spider/data/companyPage/{}/".format(jk))) + 1): with open("../spider/data/companyPage/{}/{}.txt".format(jk, i), encoding="utf-8") as fp: data = json.load(fp) for j in data["result"]: try: if j.get("otherLabel", 0) != 0 and j.get( "companyFeatures", 0) != 0 and j.get( "companyLogo", 0) != 0 and j.get( "industryField", 0) != 0: company = Company( company_id=j['companyId'], company_full_name=j['companyFullName'], company_short_name=j['companyShortName'], company_logo=j['companyLogo'], city=j['city'], industry_field=j['industryField'], company_features=j['companyFeatures'], finance_stage=j['financeStage'], company_size=j['companySize'], position_num=j['positionNum'], interview_remark_num=j['interviewRemarkNum'], update_time=j['updateTime'], process_rate=j['processRate'], approve=j['approve'], company_combine_score=j['companyCombineScore'], is_has_valid_position=j['isHasValidPosition'], other_label=j['otherLabel'], match_score=j['matchScore']) else: company = Company( company_id=j['companyId'], company_full_name=j['companyFullName'], company_short_name=j['companyShortName'], city=j['city'], position_num=j['positionNum'], finance_stage=j['financeStage'], company_size=j['companySize'], interview_remark_num=j['interviewRemarkNum'], update_time=j['updateTime'], process_rate=j['processRate'], approve=j['approve'], company_combine_score=j['companyCombineScore'], is_has_valid_position=j['isHasValidPosition'], match_score=j['matchScore']) if test_company(company, li, session): li.append(company) except Exception as e: print(e) Util.addJobsToDataBase(session, li) session.close()
import requests from requests import Response from Dao.query import Util def getDetail(job_id: int, show_id) -> Response: url = 'https://www.lagou.com/jobs/{}.html?source=pl&i=pl-0&show={}'.format( job_id, show_id) header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.424\ 0.75 Safari/537.36' } response = requests.get(url) return response if __name__ == '__main__': session = Util.getDBSession("root", "chuanzhi", "lagou")() position_list = Util.queryJobPosition(session) for i in position_list: getDetail(i[0]) session.close() break