def delete_photo(cookiefile, qqid, photo_json, sid): skey = get_cookie_value(cookiefile, "skey") gtk = get_gtk(skey) qqid = int(qqid) if qqid > 2000000000: url = DELETE_URL % gtk elif qqid > 1100000000: url = DELETE_URL2 % gtk elif qqid > 1000000000: url = DELETE_URL3 % gtk elif qqid < 200000000: url = DELETE_URL2 % gtk else: url = DELETE_URL4 % gtk data = generate_delete(qqid, photo_json['albumid'], photo_json['lloc']) logger.info("Deleting failed photo %s %s", qqid, url) result = post(url, data, cookiefile=cookiefile).decode('gbk').encode('utf8') logger.debug("Deleting %s result: %s", qqid, result) delete_json = extract_json_from_html(result, 'frameElement.callback') if (delete_json['code'] < 0): logger.warn("Deleting %s failed %s code %s, %s", qqid, delete_json['message'].encode('utf8'), delete_json['code'], delete_json['subcode']) if delete_json['code'] == -3000: log_paperboy('Need login(del) xks %s' % sid) return result
def upload_photo2(cookiefile, full_filename, qqid, sid): skey = get_cookie_value(cookiefile, "skey") boundary = "----" + gen_boundary() qqid = int(qqid) if qqid > 2000000000: urls = [UPLOAD_URL % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary] elif qqid > 1100000000: urls = [UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary] elif qqid > 1000000000: urls = [UPLOAD_URL3 % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL4 % boundary] elif qqid < 200000000: urls = [UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary] else: urls = [UPLOAD_URL4 % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL % boundary] photo_json = {} for url in urls: datagen, headers = generate_multipart_photo(qqid, skey, full_filename, boundary) headers['User-Agent'] = DEFAULT_UA headers['Accept'] = 'text/x-json,application/json;q=0.9,*/*;q=0.8' headers['Accept-Language'] = 'en-US,en;q=0.5' logger.info("Uploading photo %s %s -> %s", qqid, full_filename, url) result = post(url, datagen, headers=headers, cookiefile=cookiefile, is_accept_ending=True, ext_handlers=get_handlers()) logger.debug("Uploaded %s %s -> %s : result %s", qqid, full_filename, url, result) photo_json = simplejson.loads(result.replace("_Callback(","").replace(");",""))['data'] if photo_json.has_key('error'): logger.warn("Post failed qq %s -> %s %s %s", qqid, url, photo_json['error'], photo_json['msg'].encode('utf8')) if photo_json['error'] == -503: log_paperboy("Need login(photo) xks %s" % sid) break else: break return photo_json
def upload_photo(cookiefile, full_filename, qqid): skey = get_cookie_value(cookiefile, "skey") boundary = "----" + gen_boundary() qqid = int(qqid) if qqid > 2000000000: url = UPLOAD_URL % boundary elif qqid > 1100000000: url = UPLOAD_URL2 % boundary elif qqid > 1000000000: url = UPLOAD_URL3 % boundary elif qqid < 200000000: url = UPLOAD_URL2 % boundary else: url = UPLOAD_URL4 % boundary datagen, headers = generate_multipart_photo(qqid, skey, full_filename, boundary) headers['User-Agent'] = DEFAULT_UA headers['Accept'] = 'text/x-json,application/json;q=0.9,*/*;q=0.8' headers['Accept-Language'] = 'en-US,en;q=0.5' logger.info("Uploading photo %s %s -> %s", qqid, full_filename, url) result = post(url, datagen, headers=headers, cookiefile=cookiefile, is_accept_ending=True, ext_handlers=get_handlers()) logger.debug("Uploaded %s %s -> %s : result %s", qqid, full_filename, url, result) return result
def upload_photo2(cookiefile, full_filename, qqid, sid): skey = get_cookie_value(cookiefile, "skey") boundary = "----" + gen_boundary() qqid = int(qqid) if qqid > 2000000000: urls = [ UPLOAD_URL % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary ] elif qqid > 1100000000: urls = [ UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary ] elif qqid > 1000000000: urls = [ UPLOAD_URL3 % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL4 % boundary ] elif qqid < 200000000: urls = [ UPLOAD_URL2 % boundary, UPLOAD_URL % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL4 % boundary ] else: urls = [ UPLOAD_URL4 % boundary, UPLOAD_URL2 % boundary, UPLOAD_URL3 % boundary, UPLOAD_URL % boundary ] photo_json = {} for url in urls: datagen, headers = generate_multipart_photo(qqid, skey, full_filename, boundary) headers['User-Agent'] = DEFAULT_UA headers['Accept'] = 'text/x-json,application/json;q=0.9,*/*;q=0.8' headers['Accept-Language'] = 'en-US,en;q=0.5' logger.info("Uploading photo %s %s -> %s", qqid, full_filename, url) result = post(url, datagen, headers=headers, cookiefile=cookiefile, is_accept_ending=True, ext_handlers=get_handlers()) logger.debug("Uploaded %s %s -> %s : result %s", qqid, full_filename, url, result) photo_json = simplejson.loads( result.replace("_Callback(", "").replace(");", ""))['data'] if photo_json.has_key('error'): logger.warn("Post failed qq %s -> %s %s %s", qqid, url, photo_json['error'], photo_json['msg'].encode('utf8')) if photo_json['error'] == -503: log_paperboy("Need login(photo) xks %s" % sid) break else: break return photo_json
def crawl(): company_id = 19 url = "https://efinance.cmbchinaucs.com/Handler/ActionPage.aspx?targetAction=GetProjectList_Index" headers = { 'Host': "efinance.cmbchinaucs.com", 'Connection': "keep-alive", 'Content-Length': "33", 'Cache-Control': "max-age=0", 'Accept': "text/plain, */*", 'Origin': "https://efinance.cmbchinaucs.com", 'X-Requested-With': "XMLHttpRequest", 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", 'Content-Type': "application/x-www-form-urlencoded", 'Referer': "https://efinance.cmbchinaucs.com/", 'Accept-Encoding': "gzip,deflate", 'Accept-Language': "zh-CN,zh;q=0.8,en;q=0.6", 'Cookie': "ASP.NET_SessionId=woqbxpemqp3kk4syvfbkxtzw" } db = get_db_engine() db_ids = list( db.execute( "select original_id from loan where company_id=%s and status=0", company_id)) # db all db_ids_set = set() # 在线的所有id online_ids_set = set() # new new_ids_set = set() # update update_ids_set = set() for id in db_ids: db_ids_set.add(id[0].encode("utf-8")) # debug if FLAGS.debug_parser: import pdb pdb.set_trace() try: loan_htm = post(url, data={"targetAction": "GetProjectList_Index"}, headers=headers) loans_json = loads(loan_htm, encoding="UTF-8") print loans_json except: logger.error("url: %s xpath failed:%s", url, traceback.format_exc())
def post_content(cookiefile, qqid, content, albumid, photoid, photo_width, photo_height, special_url=None, schedule_ts=0): skey = get_cookie_value(cookiefile, "skey") gtk = get_gtk(skey) if schedule_ts: url = SHUOSHUO_TIMER_URL % gtk else: url = SHUOSHUO_URL % gtk data = generate_content(qqid, content, albumid, photoid, photo_width, photo_height, special_url, schedule_ts) logger.info("Posting content %s with photo", qqid) result = post(url, data, cookiefile=cookiefile) logger.debug("Posting %s result: %s, timer %s", qqid, result, schedule_ts) return result
def crawl(): company_id = 19 url = "https://efinance.cmbchinaucs.com/Handler/ActionPage.aspx?targetAction=GetProjectList_Index" headers = {'Host': "efinance.cmbchinaucs.com", 'Connection': "keep-alive", 'Content-Length': "33", 'Cache-Control': "max-age=0", 'Accept': "text/plain, */*", 'Origin': "https://efinance.cmbchinaucs.com", 'X-Requested-With': "XMLHttpRequest", 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36", 'Content-Type': "application/x-www-form-urlencoded", 'Referer': "https://efinance.cmbchinaucs.com/", 'Accept-Encoding': "gzip,deflate", 'Accept-Language': "zh-CN,zh;q=0.8,en;q=0.6", 'Cookie': "ASP.NET_SessionId=woqbxpemqp3kk4syvfbkxtzw"} db = get_db_engine() db_ids = list(db.execute("select original_id from loan where company_id=%s and status=0", company_id)) # db all db_ids_set = set() # 在线的所有id online_ids_set = set() # new new_ids_set = set() # update update_ids_set = set() for id in db_ids: db_ids_set.add(id[0].encode("utf-8")) # debug if FLAGS.debug_parser: import pdb pdb.set_trace() try: loan_htm = post(url, data={"targetAction": "GetProjectList_Index"}, headers=headers) loans_json = loads(loan_htm, encoding="UTF-8") print loans_json except: logger.error("url: %s xpath failed:%s", url, traceback.format_exc())