def dealGetBar(company, position, welfare):
        session = Util.getDBSession("root", "chuanzhi", "lagou")()
        data = Util.getBar(company, session)
        session.close()
        data.sort(key=lambda x: x[1], reverse=True)
        data = data

        return data
    def deal(company: str, position: str, warfare: str):

        session = Util.getDBSession("root", "chuanzhi", "lagou")()

        db_data = Util.threeParm(company, session)
        data = ServiceUtil.dealData(db_data, position, warfare)
        data = ServiceUtil.dealThreeParm(data)
        session.close()
        return data
 def getCityData(company, position, welfare, city):
     session = Util.getDBSession("root", "chuanzhi", "lagou")()
     data = Util.getCompanyCityData(
         company, city,
         random.randint(
             1, math.ceil(Util.getCompanyNum(company, city, session) / 20)),
         20, session)
     data = list(map(ServiceUtil.dealCityData, data))
     session.close()
     return data
Beispiel #4
0
 def testLogin(username, password):
     session = Util.getDBSession("root", "chuanzhi", "bookstore")()
     print(session)
     account = Util.queryUserByUsername(username, session)
     session.close()
     if not account:
         return False
     if str(account.accountId) != password:
         return False
     return True
Beispiel #5
0
def getJobObject(company_id):
    session = Util.getDBSession("root", "chuanzhi", "lagou")()
    path = "../spider/data/jobs/{}/".format(company_id)
    name_list = os.listdir(path)
    for i in name_list:
        with open(path + i, encoding="utf-8") as fp:
            try:
                data = json.load(fp)
            except Exception as e:
                print(e)
                return
            li = list()
            if data.get("content", False):
                for j in data["content"]["data"]["page"]["result"]:
                    if j.get("district", False) and j.get(
                            'industryField', False) and j.get(
                                "companyLogo", False) and j.get(
                                    "positionAdvantage", False):
                        jobs = Jobs(company_id=j['companyId'],
                                    position_id=j['positionId'],
                                    job_nature=j['jobNature'],
                                    finance_stage=j['financeStage'],
                                    company_name=j['companyName'],
                                    company_full_name=j['companyFullName'],
                                    company_size=j['companySize'],
                                    industry_field=j['industryField'],
                                    position_name=j['positionName'],
                                    city=j['city'],
                                    create_time=datetime.now(),
                                    salary=j['salary'],
                                    work_year=j['workYear'],
                                    education=j['education'],
                                    position_advantage=j['positionAdvantage'],
                                    company_label_list=",".join(
                                        j['companyLabelList']),
                                    user_id=j['userId'],
                                    company_logo=j['companyLogo'],
                                    district=j['district'])
                        li.append(jobs)
            Util.addJobsToDataBase(session, li)

        print(i)
        print("--" * 50)
    return li
Beispiel #6
0
def test_company(company: Company, li, session):

    for i in li:
        if company.company_id == i.company_id:
            return False

    if Util.getCompanyId(company.company_id, session=session).count() != 0:
        return False

    return True
Beispiel #7
0
def createDir(path):
    if not os.path.exists(path):
        os.mkdir(path)


def save(fp, n, company_id):
    with open("data/jobs/{}/{}.txt".format(company_id, n),
              "w",
              encoding="utf-8") as f:
        f.write(fp.text)


if __name__ == '__main__':

    session = Util.getDBSession("root", "chuanzhi", "lagou")()
    company_list = list(set(Util.selectAllCompany(session)))
    session.close()

    length = len(company_list)

    headers = {
        "user-agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75\
                                Safari/537.36",
        "host": "www.lagou.com",
        "referer": "https://www.lagou.com/gongsi/62.html"
    }

    temp = 0
Beispiel #8
0
import os
import time

from Dao.query import Util

if __name__ == '__main__':
    # os.listdir("../spider/data/companyPage/")
    # print(time.time())
    # print(list(os.listdir("../spider/data/companyPage/2/")))
    session = Util.getDBSession("root", "chuanzhi", "lagou")()
    if Util.getCompanyId(62, session=session).count() == 0:
        print("wujieguo")
    session.close()

Beispiel #9
0
def getCompanyObject():

    session = Util.getDBSession("root", "chuanzhi", "lagou")()
    for jk in os.listdir("../spider/data/companyPage/"):
        li = list()
        for i in range(
                1,
                len(os.listdir("../spider/data/companyPage/{}/".format(jk))) +
                1):
            with open("../spider/data/companyPage/{}/{}.txt".format(jk, i),
                      encoding="utf-8") as fp:
                data = json.load(fp)
                for j in data["result"]:

                    try:
                        if j.get("otherLabel", 0) != 0 and j.get(
                                "companyFeatures", 0) != 0 and j.get(
                                    "companyLogo", 0) != 0 and j.get(
                                        "industryField", 0) != 0:
                            company = Company(
                                company_id=j['companyId'],
                                company_full_name=j['companyFullName'],
                                company_short_name=j['companyShortName'],
                                company_logo=j['companyLogo'],
                                city=j['city'],
                                industry_field=j['industryField'],
                                company_features=j['companyFeatures'],
                                finance_stage=j['financeStage'],
                                company_size=j['companySize'],
                                position_num=j['positionNum'],
                                interview_remark_num=j['interviewRemarkNum'],
                                update_time=j['updateTime'],
                                process_rate=j['processRate'],
                                approve=j['approve'],
                                company_combine_score=j['companyCombineScore'],
                                is_has_valid_position=j['isHasValidPosition'],
                                other_label=j['otherLabel'],
                                match_score=j['matchScore'])

                        else:
                            company = Company(
                                company_id=j['companyId'],
                                company_full_name=j['companyFullName'],
                                company_short_name=j['companyShortName'],
                                city=j['city'],
                                position_num=j['positionNum'],
                                finance_stage=j['financeStage'],
                                company_size=j['companySize'],
                                interview_remark_num=j['interviewRemarkNum'],
                                update_time=j['updateTime'],
                                process_rate=j['processRate'],
                                approve=j['approve'],
                                company_combine_score=j['companyCombineScore'],
                                is_has_valid_position=j['isHasValidPosition'],
                                match_score=j['matchScore'])
                        if test_company(company, li, session):
                            li.append(company)
                    except Exception as e:
                        print(e)
        Util.addJobsToDataBase(session, li)
    session.close()
Beispiel #10
0
import requests
from requests import Response
from Dao.query import Util


def getDetail(job_id: int, show_id) -> Response:

    url = 'https://www.lagou.com/jobs/{}.html?source=pl&i=pl-0&show={}'.format(
        job_id, show_id)

    header = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.424\
        0.75 Safari/537.36'
    }

    response = requests.get(url)
    return response


if __name__ == '__main__':
    session = Util.getDBSession("root", "chuanzhi", "lagou")()
    position_list = Util.queryJobPosition(session)
    for i in position_list:
        getDetail(i[0])
        session.close()
        break