def write_results(results): sent_list_f = open(app_env.get_app_root() + "/xunguagua/sent-list.csv", encoding="UTF-8", mode="w") urls_f = open(app_env.get_app_root() + "/xunguagua/urls.txt", encoding="UTF-8", mode="w") for result in results: sent_list_f.write(",".join(result) + "\n") urls_f.write(result[2] + "\n") sent_list_f.flush() sent_list_f.close() urls_f.flush() urls_f.close()
def get_instance(cls, logger_name): if logger_name not in cls.__loggers: logger = logging.getLogger(logger_name) logger.setLevel("INFO") log_file_path = app_env.get_app_root() + "/baidu_submit/logs" if not os.path.exists(log_file_path): os.makedirs(log_file_path) fh = RotatingFileHandler(log_file_path + "/" + logger_name, maxBytes=4096 * 1024, backupCount=10) fmt = logging.Formatter('%(message)s') fh.setFormatter(fmt) logger.addHandler(fh) cls.__loggers[logger_name] = logger return cls.__loggers[logger_name]
def init_session(): cookie = open(app_env.get_app_root() + "/xunguagua/cookie.txt", encoding="GBK").read().strip() sess = requests.Session() sess.headers.update({ "Accept": "*/*", "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "zh-CN,zh;q=0.8", "Connection": "keep-alive", "Host": "yun.xunguagua.com", "Referer": "http://yun.xunguagua.com/member/?app=member&controller=list", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0", "X-Requested-With": "XMLHttpRequest", "Cookie": cookie }) return sess
import json import os import queue import sys import threading import time from baidu_submit.job import UrlSubmitJob sys.path.append(os.path.split(os.path.abspath(os.path.dirname(__file__)))[0]) from baidu_submit.url_logger import UrlLogger import app_env import requests _COOKIE_FILE = app_env.get_app_root() + "/baidu_submit/cookie.txt" _COOKIE_FILE_INVALID = app_env.get_app_root( ) + "/baidu_submit/cookie-invalid.txt" _COOKIE_EXPIRE_COUNT = 10 _THREAD_SIZE = 2 _PROXY_CONF = json.load( open(app_env.get_app_root() + "/baidu_submit/proxy.json", encoding="UTF-8")) _PROXY_REPO = "http://www.xdaili.cn/ipagent/greatRecharge/getGreatIp?spiderId={}&orderno={}&returnType=2&count=1" class BaiduSubmit: def __init__(self): self._refill_cookies() self._change_proxy() self._change_cookie() self._url_buffer = queue.Queue()
import json import os import queue import random import sys import threading import time from baidu_submit.job import UrlSubmitJob sys.path.append(os.path.split(os.path.abspath(os.path.dirname(__file__)))[0]) from baidu_submit.url_logger import UrlLogger import app_env import requests _COOKIE_FILE = app_env.get_app_root() + "/baidu_submit/cookie.txt" _COOKIE_EXPIRE_COUNT = 10 _THREAD_SIZE = 2 _PROXY_CONF = json.load( open(app_env.get_app_root() + "/baidu_submit/proxy.json", encoding="UTF-8")) _PROXY_REPO = "http://www.xdaili.cn/ipagent/greatRecharge/getGreatIp?spiderId={}&orderno={}&returnType=2&count=1" class BaiduSubmit: def __init__(self): self._refill_cookies() self._change_proxy() self._change_cookie() self._url_buffer = queue.Queue() self.start_buffer_consumer()