Beispiel #1
0
def Get_html(url):
    req = requests.get(url)
    if req.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, req.status_code))
        return None
    html = req.content
    return html
Beispiel #2
0
def Get_html(url):
    req = requests.get(url)
    if req.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, req.status_code))
        return None
    html=req.content
    return html
def get_html_t(url, coding="utf-8"):
    response = requests.get(url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))

    content = str(response.content)
    r = re.findall("charset=(\w+)\"", content)
    if r:
        coding = r[0].lower()
    response.encoding = coding
    return response.text
def get_html_t(url,coding="utf-8"):
    response = requests.get(url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))

    content=str(response.content)
    r=re.findall("charset=(\w+)\"",content)
    if r:
        coding=r[0].lower()
    response.encoding=coding
    return response.text
def __send_poster(open_id, poster_url):
    pic_url = html_change_to_png(poster_url)
    if pic_url is None:
        # __send_message_use_api(open_id, u'生成个人海报失败,请向我们反馈!')
        log.error(u'生成个人海报失败,用户openid:%s' % open_id)
        return None
    content = "<img style='width: 200px;' src='%s'>" % pic_url
    __send_message_use_api(open_id, content)

    # 告诉微信自动回复系统名片海报已经更新
    tell_api_build_card(open_id, pic_url)
Beispiel #6
0
def Get_html(url):
    """
    获取html
    :param url:
    :return:
    """
    req = requests.get(url)
    if req.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, req.status_code))
        return None
    html=req.content.decode('gb2312','ignore').encode('utf8')          #解码并编码为utf8
    return html
Beispiel #7
0
def tell_api_build_card(open_id, pic_url):
    """
    告诉API接口,我们生成了一张海报
    :return:
    """
    r = requests.post('http://wechatsys.lingnanchuangye.com/set_key_data/frefer45g4re514re', data={
        'key': 'poster_image_' + open_id,
        'value': pic_url
    })
    if r.status_code != 200:
        log.error(u'通知微信公众号生成了新海报操作失败,用户openid:%s' % open_id)
        raise Exception('error!')
Beispiel #8
0
def tell_api_build_card(open_id, pic_url):
    """
    告诉API接口,我们生成了一张海报
    :return:
    """
    r = requests.post(
        'http://wechatsys.lingnanchuangye.com/set_key_data/frefer45g4re514re',
        data={
            'key': 'poster_image_' + open_id,
            'value': pic_url
        })
    if r.status_code != 200:
        log.error(u'通知微信公众号生成了新海报操作失败,用户openid:%s' % open_id)
        raise Exception('error!')
def get_all_page_of_job():
    """
    获取每一页的招聘信息
    :return:
    """
    global first_url
    response = requests.get(first_url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (first_url, response.status_code))
        return None
    response.encoding = 'gbk'
    objs = get_tile_and_link_lists(response.text)
    objs = list(map(handle_job_message, objs))
    return objs
def get_all_page_of_job():
    """
    获取每一页的招聘信息
    :return:
    """
    global first_url
    response = requests.get(first_url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (first_url, response.status_code))
        return None
    response.encoding = 'gbk'
    objs = get_tile_and_link_lists(response.text)
    objs = list(map(handle_job_message, objs))
    return objs
Beispiel #11
0
def handle_job_message(obj):
    """
    处理兼职信息
    :param obj:
    :return:
    """
    tools.sleep_some_time()
    response = requests.get(obj['web_url'])
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (obj['web_url'], response.status_code))
        return obj

    obj['web_html'] = response.content
    obj['company'] = tools.get_company_name(obj['web_html'])
    obj['position'] = tools.get_work_position(obj['web_html'])
    obj['work_city'] = tools.get_work_citys(obj['web_html'])
    return obj
Beispiel #12
0
def handle_job_message(obj):
    """
    处理兼职信息
    :param obj:
    :return:
    """
    tools.sleep_some_time()
    response = requests.get(obj['web_url'])
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (obj['web_url'], response.status_code))
        return obj

    obj['web_html'] = response.content
    obj['company'] = tools.get_company_name(obj['web_html'])
    obj['position'] = tools.get_work_position(obj['web_html'])
    obj['work_city'] = tools.get_work_citys(obj['web_html'])
    return obj
Beispiel #13
0
def get_all_page_html():
    """
    获取若干页
    :return:
    """
    global first_html_url
    # 获取第一页信息
    response = requests.get(first_html_url)
    if response.status_code != 200:
        return None

    html = response.content
    if not html:
        return None

    messages = get_message_title_and_url_list(html)

    page = 2
    form_data = get_search_form(html)
    while page < 50:
        form_data['__EVENTTARGET'] = 'ctl00$cph_content_temp$DataPager1$ctl01$ctl0%d' % (page - 1)
        response = requests.post(first_html_url, form_data)

        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (first_html_url, response.status_code))
            page += 1
            continue

        # 更新form隐藏字段
        html = response.content
        form_data = get_search_form(html)

        temp_messages = get_message_title_and_url_list(html)
        messages.extend(temp_messages)

        if not temp_messages or len(temp_messages) < 10:
            break

        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
Beispiel #14
0
def get_all_page_of_work():
    """
    获取所有页
    :return:
    """
    global web_url
    page = 1
    messages = []
    while page < 6:
        url = web_url + str(page)
        response = requests.get(url)
        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
            continue
        objs = get_message_title_and_url_list(response.content)
        messages.extend(objs)

        if len(objs) < 15:
            break
        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
Beispiel #15
0
def get_all_jobs():
    """
    获取最近的招聘信息
    :return:
    """
    global message_url
    messages = []
    page = 1
    while page <= 5:
        url = message_url % page
        response = requests.get(url)
        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
            continue
        response.encoding = 'gbk'
        objs = get_title_and_link_list(response.text)
        messages.extend(objs)
        if len(objs) < 18:
            break
        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
Beispiel #16
0
def get_all_page_of_work():
    """
    获取所有页
    :return:
    """
    global web_url
    page = 1
    messages = []
    while page < 6:
        url = web_url + str(page)
        response = requests.get(url)
        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
            continue
        objs = get_message_title_and_url_list(response.content)
        messages.extend(objs)

        if len(objs) < 15:
            break
        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
Beispiel #17
0
def add_a_job(job_title, job_company, job_url, job_city, job_message_source,
              job_position, job_release_time, web_html):
    """
    添加一条招聘信息
    :param job_title: 工作标题
    :param job_company: 公司名字
    :param job_url: 跳转的页面地址
    :param job_city: 工作地点
    :param job_message_source: 消息来源
    :param job_position: 工作职位
    :param job_release_time: 工作发布时间
    :param web_html: 抓取的网页html
    :return:
    """
    if type(job_city) == list:
        job_city = '#'.join(job_city)
    if type(job_position) == list:
        job_position = '#'.join(job_position)
    token = hashlib.md5(job_company.encode("UTF-8")).hexdigest()
    job_release_time = tools.get_real_time(job_release_time)
    sql = """
        insert into jobs(title, company, position, web_url, work_city, message_source, job_type,
        authentication, status, web_html, release_time, token, create_time)
        values(%s, %s, %s, %s, %s, %s, 0, 0, 0, %s, %s, %s, now());
    """

    try:
        insert_id = db_lib.insert(sql, [
            job_title, job_company, job_position, job_url, job_city,
            job_message_source, web_html, job_release_time, token
        ])
        add_company(job_company)
        return insert_id
    except IntegrityError:
        return 0
    except Exception as error:
        log.error("写入数据库失败(错误类型:%s), 信息地址:%s" % (str(error), job_url))
        return -1
Beispiel #18
0
def get_all_page_of_job():
    """
    获取最近的招聘信息
    :return:
    """
    global message_url
    messages = []
    page = 1
    while page <= 1:
        url = message_url % (page - 1) * 20
        response = requests.get(url)
        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
            continue
        objs = get_title_and_link_list(response.content)
        messages.extend(objs)

        if len(objs) < 20:
            break

        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
Beispiel #19
0
def get_all_page_of_job():
    """
    获取最近的招聘信息
    :return:
    """
    global message_url
    messages = []
    page = 1
    while page <= 1:
        url = message_url % (page - 1) * 20
        response = requests.get(url)
        if response.status_code != 200:
            log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
            continue
        objs = get_title_and_link_list(response.content)
        messages.extend(objs)

        if len(objs) < 20:
            break

        page += 1
    messages = list(map(handle_job_message, messages))
    return messages
def get_html(url):
    response = requests.get(url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
    return response.content
def get_html(url):
    response = requests.get(url)
    if response.status_code != 200:
        log.error("网址(%s)无法访问,状态码:%d" % (url, response.status_code))
    return response.content