def main(): user = '' # 用户名 passwd = '' # 密码 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0' } cookies = get_cookies(user, passwd) number = 10000 spider = UserListSpider(headers, cookies) spider.start(number)
def main(): user = '' # 用户名 passwd = '' # 密码 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0' } cookies = get_cookies(user, passwd) thread1 = GetUsers('get_users_thread', headers, cookies) thread2 = GetTopic('get_topic_thread', headers, cookies) thread3 = Check('check_thread', thread1, thread2, 40000, headers, cookies) thread3.start()
def main(): user = '' # 用户名 passwd = '' # 密码 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0' } cookies = get_cookies(user, passwd) if os.path.exists('data/end_position_uft.txt'): with open('data/end_position_uft.txt', 'r') as f: position = int(f.readline().strip()) else: position = 0 spider = UserTopicSpider(headers, cookies) spider.start(position)
def cookies(): return get_cookies()
exception = "" try: response_json = response.json() response_code = response_json["code"] except Exception as ex: exception = ex response_code = 100001 with open("weibo_log", "a+") as f: f.write(mid + "\n") f.write(str(response_code) + "\n") f.write("Exception:" + str(exception) + "\n") if __name__ == '__main__': mids = get_weibo_list() cookies = login.get_cookies(user_name, pass_word) random.seed() random_string = str(random.random()) + str( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) # post_new_message("test 1.0" + str(random_string)) for mid in mids: time.sleep(60) forward_message(mid=mid, reason="reason" + str(random_string))
from config import headers from logger import crawler from login import get_cookies from db.dao import LoginInfoOper from utils import (send_email, getip) from db.redis_db import (Urls, Cookies) from page_parse import (is_403, is_404, is_complete) from decorators import (timeout_decorator, timeout) from config import (get_timeout, get_crawl_interal, get_excp_interal, get_max_retries) TIME_OUT = get_timeout() INTERAL = get_crawl_interal() MAX_RETRIES = get_max_retries() EXCP_INTERAL = get_excp_interal() COOKIES = get_cookies() # Instead of disable warning, why not use it as docs suggested # https://stackoverflow.com/questions/42982143/python-requests-how-to-use-system-ca-certificates-debian-ubuntu os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(os.sep, '/etc/ssl/certs', 'ca-certificates.crt') def is_banned(url): if 'unfreeze' in url or 'accessdeny' in url or 'userblock' in url or 'verifybmobile' in url: return True return False @timeout(200) @timeout_decorator
from utils import (send_email, getip) from db.redis_db import ( Urls, Cookies) from page_parse import ( is_403, is_404, is_complete) from decorators import ( timeout_decorator, timeout) from config import ( get_timeout, get_crawl_interal, get_excp_interal, get_max_retries) TIME_OUT = get_timeout() INTERAL = get_crawl_interal() MAX_RETRIES = get_max_retries() EXCP_INTERAL = get_excp_interal() COOKIES = get_cookies() # Disable annoying InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) def is_banned(url): if 'unfreeze' in url or 'accessdeny' in url or 'userblock' in url or 'verifybmobile' in url: return True return False @timeout(200) @timeout_decorator def get_page(url, auth_level=2, is_ajax=False, need_proxy=False):
__author__ = 'zhongjr' import novel as nl import login as lg if __name__ == '__main__': novel_name = '实用主义者的爱情' chapter_bgn = 110 chapter_end = None username = '' password = '' # lg.user_login(username, password) cookies = lg.get_cookies(username, password) novel = nl.Novel(novel_name, chapter_bgn, chapter_end, cookies) novel.get_novel()