Beispiel #1
0
def zp_logger(d):
    data = {}
    if not d.get('zhiwei'):
        print 'no data get'
        return
    ss = s % (d.get('zhiwei').text(), d.get('company').text())
    data['title'] = d.get('zhiwei').text() + u' 招聘'
    data['content'] = u'<h2>单位名称:%s</h2><p>职位介绍:<br>%s</p>%s' % (
        d.get('company').text(), d.get('detail').text(), ss)
    data['description'] = (len(d.get('company').text()) > 35) and d.get(
        'company').text()[:35] or d.get('company').text()
    data['featured_image'] = d.get('imgs') and d.get(
        'imgs')[0] or '/static/zhiyuw/cy_images/images/infor.jpg'
    data['cate'] = 24
    data['reference'] = d.get('request_info').get('url')
    controller.save_content(data)
    with open(config['zp']['log'], 'w') as f:
        f.write('%s\n' % d.get('index'))
Beispiel #2
0
def zx_logger(d, cate=25):
    data = {}
    if not d.get('title'):
        print 'no data get'
        return
    data['title'] = d.get('title').text()
    data['content'] = d.get('content').html()
    data['description'] = (len(d.get('content').text()) > 35) and d.get(
        'content').text()[:35] or d.get('content').text()
    data['featured_image'] = d.get('imgs') and d.get(
        'imgs')[0] or '/static/zhiyuw/cy_images/images/infor.jpg'
    des = os.path.join(config.get('images_dir'), 'zx')
    downloader = DownloadImage(des)
    for img in d.get('imgs'):
        src = img.attrib.get('src')
        if not src.startswith('http'):
            src = '%s://%s:%s%s' % (d.get('request_info').get('proxy'),
                                    d.get('request_info').get('hostname'),
                                    d.get('request_info').get('port', 80), src)
        img_path = downloader.download_image(src)
        if img_path:
            img_static_path = config.get('static_dir') + img_path.replace(
                config.get('images_dir'), '')
            data['content'] = data['content'].replace(
                'src="%s"' % src, 'src="%s"' % img_static_path)
        else:
            with open(config.get('log'), 'a') as f:
                f.write('Download Image Fail "' + src + '" For url:' +
                        d.get('request_info').get('url'))
    data['cate'] = cate
    data['reference'] = d.get('request_info').get('url')
    controller.save_content(data)
    if cate == 25:
        with open(config['zx']['log'], 'w') as f:
            f.write('%s\n' % d.get('index'))
    elif cate == 26:
        with open(config['gyrc']['log'], 'w') as f:
            f.write('%s-%s-%s\n' % d.get('index')[:3])