Beispiel #1
0
    }
    </script>
    '''
    picId = urlsplit(url).path.split('/')[-1]
    userId = parse_qs(urlsplit(url).query)['u'][0]
    albumId = json.loads(soup.find('script', id='pix-json-set-info').string)['albumId']
    ajax_url = 'http://wantu.taobao.com/ajax/PicDetailAjax.do?picId=%s&userId=%s&albumId=%s&t=1365154666759&_method=read'
    ajax_url = ajax_url % (picId, userId, albumId)
    resp = json.loads(httplib.urlopen(ajax_url)[2].decode('gbk'))
    picture = resp['data']['models'][0]['picPath']
    description = httplib.html_unescape(resp['data']['models'][0]['desc'])
    return (picture, description)

#used for pipeline work
gm_client = GearmanClient([config.job_server])
gm_client.data_encoder = JSONDataEncoder

def submit_html_job(url):
    func_name = 'worker_process_html'
    job_req = gm_client.submit_job(func_name, url, unique=md5sum(url),
                                 background=True, wait_until_complete=False)
    return job_req.job.unique

def submit_pic_job(url):
    func_name = 'worker_process_pic'
    job_req = gm_client.submit_job(func_name, url, unique=md5sum(url),
                                 background=True, wait_until_complete=False)
    return job_req.job.unique

def worker_process_html(gearman_worker, gearman_job):
    url = gearman_job.data
Beispiel #2
0
def submit(url):
    gm_client = GearmanClient([config.job_server])
    gm_client.data_encoder = JSONDataEncoder
    job_req = gm_client.submit_job('worker_process_html', url)
    print ns.url[0], 'is submitted'
    return job_req