Ejemplo n.º 1
0
def main():
    c = connect('lagou', 'companys')

    companyIds = []
    for result in c.find():
        companyIds.append(result.get('companyId'))

    UA = get_ua()
    url = 'https://www.lagou.com/gongsi/searchPosition.json'

    cursor = connect('lagou', 'positions')

    for companyId in companyIds[10000:20000]:
        company = {}
        for label in labels.keys():
            items = []
            for i in range(1, 10):
                payload = get_payload(companyId, label, i, 10)
                response = requests.get(url, params=payload, headers=UA).json()
                time.sleep(0.7)
                results = get_results(response)
                if len(results) == 0:
                    company[labels.get(label)] = get_jobs(items, label)
                    break
                else:
                    items += results

        company['companyId'] = companyId
        print("Get ", companyId)
        cursor.insert(company)
Ejemplo n.º 2
0
def main():
    useds = used()
    unuseds = []
    cursor = connect('lagou', 'jobs')
    cl = connect('lagou', 'uncate')
    for result in cursor.find():
        name = result.get('name')
        ID = result.get('ID')

        if ID in useds:
            continue
        else:
            item = {}
            item['ID'] = ID
            item['name'] = name
            cl.insert(item)
Ejemplo n.º 3
0
def used():
    cursor = connect('lagou', 'categories')
    useds = []

    for result in cursor.find():
        useds.append(result.get('ID'))

    return useds
Ejemplo n.º 4
0
def main():
    cursor = connect('lagou', 'jobs')
    cl = connect('lagou', 'categories')

    for category in categories.keys():

        for result in cursor.find():
            item = {}
            name = result.get('name')
            ID = result.get('ID')

            for c in categories.get(category):
                if c in name:
                    item['name'] = name
                    item['ID'] = ID
                    item['category'] = category
                    cl.insert(item)
                    break
Ejemplo n.º 5
0
def load_data():
    """载入职位名称
        @return -> list,所有的职位名
    """
    names = []
    cursor = connect('lagou', 'jobs')
    for result in cursor.find():
        names.append(result.get('name'))

    return names
Ejemplo n.º 6
0
def main():
    usedID = []
    cursor = connect('lagou', 'uncate')
    cl = connect('lagou', 'categories')

    for category in categories.keys():

        for result in cursor.find():
            item = {}
            name = result.get('name')
            ID = result.get('ID')
            if ID in usedID:
                continue

            for c in categories.get(category):
                if c in name:
                    usedID.append(ID)
                    item['name'] = name
                    item['ID'] = ID
                    item['category'] = category
                    cl.insert(item)
                    break
Ejemplo n.º 7
0
def main():
    trainSet = loadData()
    mysql = MySql('locations', 'leo', 'mm123456', collection='new_provices')
    cursor = connect('lagou', 'predicts')
    results = mysql.find()

    for result in results:
        province = result.get('Province')
        city = result.get('city')
        county = result.get('county')
        lng = float(result.get('Longitude'))
        lat = float(result.get('latitude'))
        neighbors = get_neighbors(trainSet, [lng, lat], 1)
        assume = predict(neighbors)
        cursor.insert({
            'province': province,
            'city': city,
            'county': county,
            'lng': lng,
            'lat': lat,
            'value': assume
        })

    mysql.close()
Ejemplo n.º 8
0
#获得城市不同发展阶段的公司所占比例
from utils.mongo import connect

jobs = connect('lagou', 'jobs')

#cities = ('北京', '上海', '广州', '深圳', '杭州')#保存要统计的城市
cities = ('成都', '武汉', '南京', '西安', '长沙')
counter = {}  #用来保存城市
company_ids = set()  #用于过滤重复公司

for job in jobs.find():
    company_id = job.get('companyId')
    if company_id in company_ids:
        continue
    else:
        company_ids.add(company_id)

    company = job.get('company')
    city = job.get('location').replace(' ', '')
    dev = company.get('发展阶段').replace('发展阶段', '')

    if city in cities:
        if city in counter.keys():  #如果已经存在
            if dev in counter[city].keys():  #如果发展阶段已经存在城市的字典里
                counter[city][dev] += 1
            else:  #如果发展阶段没有存在
                counter[city][dev] = 1
        else:  #如果城市还未存在
            counter[city] = {}  #置为空字典
    else:
        continue
Ejemplo n.º 9
0
"""获取每个省份的互联网公司数目
"""
from utils.mongo import connect

counter = {}
cursor = connect('lagou', 'location')

for result in cursor.find():
    nums = result.get('company_nums')
    province = result.get('province')

    if province not in counter.keys():
        counter[province] = nums
    else:
        counter[province] = counter.get(province) + nums

for key, value in counter.items():
    print('{name:', "'", key, "'", ',', 'value:', value, '},')
Ejemplo n.º 10
0
"""统计各个分类下的人数
"""
from utils.mongo import connect

cursor = connect('lagou', 'categories')

counter = {}

for result in cursor.find():
    category = result.get('category')
    if category in counter.keys():
        counter[category] = counter.get(category) + 1
    else:
        counter[category] = 1

print(counter)
Ejemplo n.º 11
0
#互联网公司工作年限与对应年薪
from utils.mongo import connect

categories = connect('lagou', 'categories')
jobs = connect('lagou', 'jobs')
salarys = connect('lagou', 'salarys')

product = {}
design = {}
function = {}
market = {}
finance = {}
maintain = {}
tech = {}


def count(d, ID):
    """统计计数不同分类对经验的要求与年薪
        @param d
         7种字典
        @param ID
         职位的ID
        @postcondition
         对应的字典包含不同年限的计数,年薪,平均年薪
    """
    year = jobs.find_one({'ID': ID}).get('experience')  #获取工作经验
    salary = salarys.find_one({'ID': ID}).get('salary_avg')  #获取平均月薪
    annual_salary = salary * 12  #得到年薪

    if year in d.keys():
        d[year]['count'] += 1
Ejemplo n.º 12
0
"""获取公司的ID
    写入到数据库
"""
from utils import mongo

cursor = mongo.connect('lagou', 'jobs')

results = cursor.find()
companys = set()

for result in results:
    companyId = result.get('companyId')
    companys.add(companyId)

print(len(companys))

c = mongo.connect('lagou', 'companys')
for company in companys:
    item = {}
    item['companyId'] = company
    c.insert(item)
Ejemplo n.º 13
0
from utils.mongo import connect
from utils.mysql import MySql

mysql = MySql('locations', 'leo', 'mm123456', collection='new_provices')
locations = connect('lagou', 'location')

cities = []
try:
    for location in locations.find():
        city = location.get('city')
        new_city = city + '%'
        result = mysql.find(mohu=True, city=new_city)
        try:
            province = result.get('Province')
        except Exception:
            print(city)
        locations.update_one({'city': city}, {'$set': {'province': province}})
finally:
    mysql.close()
Ejemplo n.º 14
0
"""This is to discover the connection
    between diploma and job
"""
from utils import mongo

jobs = mongo.connect('lagou', 'jobs')
backgrounds = mongo.connect('lagou', 'backgrounds')

items = {}#要插入到backgrounds的数据

count = 0#计数器

diplomas = []#一共有多少学历要求

def main():
    results = jobs.find()

    for result in results:
        diploma = result.get('background')

        if diploma not in items.keys():
            items[diploma] = 1
        else:
            items[diploma] = items.get(diploma) + 1

    print(items)

if __name__ == '__main__':
    main()