def test_fail_bost(self, testdata): login = Login(self.driver) login.login(testdata["username"],testdata["password"]) time.sleep(3) real = login.get_title() hope = testdata["hope"] self.assertEqual(real, hope)
def get_categories_and_save_to_db(): login = Login() login.login() a_elements = driver.find_elements_by_xpath( "//table[@class='categories']//child::a") categories = [] for a_element in a_elements: link_href = a_element.get_attribute('href') category_id = int((link_href.split('?')[-1]).split("=")[-1]) category_name = a_element.get_attribute('text') new_category = Category(category_name, category_id) categories.append(new_category) database.insert_category(category_id, category_name) return categories
def __init__(self): http_client.HTTPConnection.debuglevel = 0 try: Logging.info("geting cookies") self.requests = requests.Session() self.requests.cookies = cookielib.LWPCookieJar("zhihu_api/cookies") self.cookies = self.requests.cookies self.requests.cookies.load(ignore_discard=True) Login.islogin() except NotLogin: Login.login() except: Logging.error(u"找不到cookie") reload(sys) sys.setdefaultencoding("utf8") Logging.info("Default encoding: " + sys.getdefaultencoding()) self.proxies = {"http": "http://127.0.0.1:8080", "https": "http://127.0.0.1:8080"}
class Spider(object): def __init__(self, email, password): self.headers = { 'Referer': 'https://github.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 'Host': 'github.com' } self.email = email self.password = password self.foller_url = 'https://github.com/{name}?tab=followers' self.login = Login() self.session = self.login.login(email, password) self.set = set() def re_login(self): self.session = self.login.login(self.email, self.password) def is_login(self): return self.login.is_login() def get_follers(self, name): if name not in self.set: print("find user! " + name) self.set.add(name) else: return url = self.foller_url.format(name=name) response = self.session.get(url, headers=self.headers) selector = etree.HTML(response.text) list_ = selector.xpath('//a[@data-hovercard-type="user"]/@href') ll = [] for li in list_: ll.append(li.replace("/", '')) list_ = list(set(ll)) print("user name :" + name + " followers nums :" + str(len(list_))) for ll in list_: self.get_follers(ll) def run(self): self.get_follers("xiantang")
def __init__(self): http_client.HTTPConnection.debuglevel = 0 try: Logging.info('geting cookies') self.requests = requests.Session() self.requests.cookies = cookielib.LWPCookieJar('zhihu_api/cookies') self.cookies = self.requests.cookies self.requests.cookies.load(ignore_discard=True) Login.islogin() except NotLogin: Login.login() except: Logging.error(u"找不到cookie") reload(sys) sys.setdefaultencoding('utf8') Logging.info("Default encoding: " + sys.getdefaultencoding()) self.proxies = { "http": "http://127.0.0.1:8080", "https": "http://127.0.0.1:8080" }
def nsuite(): n = unittest.TestSuite() n.addTest(Signup.SignUp("test")) n.addTest(General.general()) n.addTest(Lists.lists()) n.addTest(Views.views()) n.addTest(Subtasks.subtasks()) n.addTest(Files.files()) n.addTest(Recurrence.recurrence()) n.addTest(Positioning.positioning()) n.addTest(DefaultFolder.default_folder()) n.addTest(Sharing.sharing()) n.addTest(ListSharing.list_sharing()) n.addTest(Login.login()) return n
from Login import Login from Login import JsonSetting import ybc_box as box js_setting = JsonSetting('user_data.json') login = Login(js_setting) word = '欢迎进入迷宫游戏,请选择' while True: dz_choice = box.buttonbox(word + '(按❌ / Cancel退出)', ['登录(请注意大小写)', '注册(不支持重复用户名)']) if dz_choice is None: box.msgbox('您已退出,回头见……!') exit() elif dz_choice == '登录(请注意大小写)': accepted, name, money = login.login() if accepted: box.msgbox('欢迎您,{0},点击🆗马上开始游戏'.format(name)) player_name = name player_dollar = int(money) break else: word = '很抱歉,登录失败,请重试' continue else: if login.registered(): word = '注册成功!!请登录进入游戏' continue else: word = '很抱歉,注册失败(可能重名了),请重试' continue
def test_login(self): log_test = Login(self) log_test.test_bad_user() log_test.test_bad_pass() log_test.login()
# -*- coding: utf-8 -*- import logging import threading from Login import Login, loopPool from logConfig import setup_logging import sys reload(sys) # Python2.5 初始化后会删除 sys.setdefaultencoding 这个方法,我们需要重新载入 sys.setdefaultencoding('utf-8') logger = logging.getLogger(__name__) setup_logging() login = Login() if login.login(): getMessageThread = threading.Thread(target=loopPool()) getMessageThread.start()
class SpiderTopicData(object): """ 获取知乎数据 """ def __init__(self): """ 初始化相关参数 """ self.login = Login() self.login.login() self.headers = self.login.getHeaders() self.data = self.login.getData() self.session = self.login.getSession() # self.session.encoding = 'utf8' # print self.session. # data link list self.havefinished_list = [] # 已经处理的数据连接 self.waitting_list = [] # 未处理的数据连接 self.record_topic_data = dict() self.record_topic_link_data = dict() def setRootTopic(self,root_topic_id='19778317'): self.waitting_list.append(root_topic_id) def relogin(self): """ 短线重连 :return: """ self.login.login() self.headers = self.login.getHeaders() self.data = self.login.getData() self.session = self.login.getSession() def getLinkTopic(self,link_url='https://www.zhihu.com/topic/19778317/organize/entire?parent=19778317'): """ 获取连接下的数据 :param link_url: :return: 数据 text """ # self.session try: res = self.session.post(link_url,data=self.data,headers=self.headers) time.sleep(random.randint(4,12)) except: self.relogin() res = self.session.post(link_url,data=self.data,headers=self.headers) time.sleep(random.randint(4,12)) topic = json.loads(res.text) # topic = eval(res.text) cur_topic = topic['msg'][0] sub_topics = topic['msg'][1] # for test parent_topic_name = cur_topic[1].encode('utf8') parent_topic_id = cur_topic[2] sub_topics_name = [] sub_topics_id = [] for sub in sub_topics: sub_topics_id.append(sub[0][2]) sub_topics_name.append(sub[0][1].encode('utf8')) result = dict() result["parent_topic_id"] = parent_topic_id result["parent_topic_name"] = parent_topic_name result["sub_topics_name"] = sub_topics_name result["sub_topics_id"] = sub_topics_id return result def recordData(self,result): """ 处理爬取的topic 数据,并记录 :param result: :return: """ sub_topics_id = result['sub_topics_id'] sub_topics_name = result['sub_topics_name'] parent_topic_name = result['parent_topic_name'] parent_topic_id = result['parent_topic_id'] existed = False child_topic_id = '' if parent_topic_id not in self.record_topic_data: self.record_topic_data[parent_topic_id] = parent_topic_name for sub_id,sub_name in zip(sub_topics_id,sub_topics_name): if sub_name == str("加载更多"): existed = True child_topic_id = sub_id continue if sub_id not in self.record_topic_data: self.record_topic_data[sub_id] = sub_name if sub_id not in self.record_topic_link_data: self.record_topic_link_data[sub_id] = [] self.record_topic_link_data[sub_id].append(parent_topic_id) return existed,parent_topic_id,child_topic_id def getSubTopic(self,parent_topic_id='19778317',child_topic_id=''): """ 爬取知乎某一话题下的所有子话题,仅爬取话题下一层(即仅爬取当前话题的孩子话题,孙子不管) :param parent_topic_id: 当前话题 ID :param child_topic_id: 子话题 ID应对显示不全时 :return: """ state = True sub_topics_id = [] while state: url_link = 'https://www.zhihu.com/topic/{0}/organize/entire'.format(parent_topic_id) if child_topic_id: url_link += "?child={}&parent={}".format(child_topic_id, parent_topic_id) result = self.getLinkTopic(url_link) sub_topics_id.extend(result['sub_topics_id']) state,parent_topic_id,child_topic_id = self.recordData(result) return sub_topics_id def getAllTopic(self): """ 爬取队列所有话题及子话题 :return: """ while self.waitting_list: topic_id = self.waitting_list.pop(0) if topic_id in self.havefinished_list: continue sub_topics = self.getSubTopic(parent_topic_id=topic_id) self.waitting_list.extend((list(set(sub_topics)))) self.havefinished_list.append(topic_id) print "当前一获取topic number:{}\t 当前已遍历 topic number: {}"\ .format(len(self.havefinished_list)+len(self.waitting_list),len(self.havefinished_list)) self.writeResulttoFile() def writeResulttoFile(self,topic_file=config.TopicFilePath+"zhihu_topic.json",topic_link_file = config.TopicFilePath+'zhihu_topic_link.json'): with codecs.open(topic_file,'w',encoding='utf8') as topic_fp: json.dump(self.record_topic_data,topic_fp,ensure_ascii=False,encoding='utf8') with codecs.open(topic_link_file,'w',encoding='utf8') as topic_link_fp: json.dump(self.record_topic_link_data,topic_link_fp,ensure_ascii=False,encoding='utf8')
class SpiderTopicData(object): """ 获取知乎数据 """ def __init__(self): """ 初始化相关参数 """ self.login = Login() self.login.login() self.headers = self.login.getHeaders() self.data = self.login.getData() self.session = self.login.getSession() # self.session.encoding = 'utf8' # print self.session. # data link list self.havefinished_list = [] # 已经处理的数据连接 self.waitting_list = [] # 未处理的数据连接 self.record_topic_data = dict() self.record_topic_link_data = dict() def setRootTopic(self, root_topic_id='19778317'): self.waitting_list.append(root_topic_id) def relogin(self): """ 短线重连 :return: """ self.login.login() self.headers = self.login.getHeaders() self.data = self.login.getData() self.session = self.login.getSession() def getLinkTopic( self, link_url='https://www.zhihu.com/topic/19778317/organize/entire?parent=19778317' ): """ 获取连接下的数据 :param link_url: :return: 数据 text """ # self.session try: res = self.session.post(link_url, data=self.data, headers=self.headers) time.sleep(random.randint(4, 12)) except: self.relogin() res = self.session.post(link_url, data=self.data, headers=self.headers) time.sleep(random.randint(4, 12)) topic = json.loads(res.text) # topic = eval(res.text) cur_topic = topic['msg'][0] sub_topics = topic['msg'][1] # for test parent_topic_name = cur_topic[1].encode('utf8') parent_topic_id = cur_topic[2] sub_topics_name = [] sub_topics_id = [] for sub in sub_topics: sub_topics_id.append(sub[0][2]) sub_topics_name.append(sub[0][1].encode('utf8')) result = dict() result["parent_topic_id"] = parent_topic_id result["parent_topic_name"] = parent_topic_name result["sub_topics_name"] = sub_topics_name result["sub_topics_id"] = sub_topics_id return result def recordData(self, result): """ 处理爬取的topic 数据,并记录 :param result: :return: """ sub_topics_id = result['sub_topics_id'] sub_topics_name = result['sub_topics_name'] parent_topic_name = result['parent_topic_name'] parent_topic_id = result['parent_topic_id'] existed = False child_topic_id = '' if parent_topic_id not in self.record_topic_data: self.record_topic_data[parent_topic_id] = parent_topic_name for sub_id, sub_name in zip(sub_topics_id, sub_topics_name): if sub_name == str("加载更多"): existed = True child_topic_id = sub_id continue if sub_id not in self.record_topic_data: self.record_topic_data[sub_id] = sub_name if sub_id not in self.record_topic_link_data: self.record_topic_link_data[sub_id] = [] self.record_topic_link_data[sub_id].append(parent_topic_id) return existed, parent_topic_id, child_topic_id def getSubTopic(self, parent_topic_id='19778317', child_topic_id=''): """ 爬取知乎某一话题下的所有子话题,仅爬取话题下一层(即仅爬取当前话题的孩子话题,孙子不管) :param parent_topic_id: 当前话题 ID :param child_topic_id: 子话题 ID应对显示不全时 :return: """ state = True sub_topics_id = [] while state: url_link = 'https://www.zhihu.com/topic/{0}/organize/entire'.format( parent_topic_id) if child_topic_id: url_link += "?child={}&parent={}".format( child_topic_id, parent_topic_id) result = self.getLinkTopic(url_link) sub_topics_id.extend(result['sub_topics_id']) state, parent_topic_id, child_topic_id = self.recordData(result) return sub_topics_id def getAllTopic(self): """ 爬取队列所有话题及子话题 :return: """ while self.waitting_list: topic_id = self.waitting_list.pop(0) if topic_id in self.havefinished_list: continue sub_topics = self.getSubTopic(parent_topic_id=topic_id) self.waitting_list.extend((list(set(sub_topics)))) self.havefinished_list.append(topic_id) print "当前一获取topic number:{}\t 当前已遍历 topic number: {}"\ .format(len(self.havefinished_list)+len(self.waitting_list),len(self.havefinished_list)) self.writeResulttoFile() def writeResulttoFile(self, topic_file=config.TopicFilePath + "zhihu_topic.json", topic_link_file=config.TopicFilePath + 'zhihu_topic_link.json'): with codecs.open(topic_file, 'w', encoding='utf8') as topic_fp: json.dump(self.record_topic_data, topic_fp, ensure_ascii=False, encoding='utf8') with codecs.open(topic_link_file, 'w', encoding='utf8') as topic_link_fp: json.dump(self.record_topic_link_data, topic_link_fp, ensure_ascii=False, encoding='utf8')
# -*-coding: UTF-8-*- import time,re from HotTopic import HotTopic from Login import Login login = Login('yourUserName','yourPWD') login.login() #每天的话题 '''' topic = HotTopic(login.gsid) topic.hot_topic() ''' # '''''' #搜话题 topic = HotTopic(login.gsid) for eachline in file('key.txt','r'): print eachline print type(eachline) topic.linkProducer(eachline.strip())
from Coupon import Coupon from Login import Login from Book import Book if __name__ == '__main__': config = open('config', 'r+') thor = config.readline() if not thor or thor == '': #登录获取thor(扫码登录) loginObj = Login() loginObj.login() thor = loginObj.get() config.write(thor) config.close() #根据商品的id快速下单 b = Book(thor) b.book('27752137726', 1) #根据券的id领取优惠券 c = Coupon(thor) c.getCoupon( 'a89a85ebee8296dc65bd776f4a1d414e4e224464bb14bd65f5e822c44ff2854868dffebfbaa256891583cb0c75b53605' )