def update(): driver = webdriver.Firefox() number_of_classes = 6 login.log_in(driver) getdata.navigate_to_gradebook(driver) gradebook_data = getdata.get_gradebook_data(driver, number_of_classes) for gradebook in gradebook_data: assignments = parse.make_assignments(gradebook, gradebook_data.index(gradebook)) parse.write_assignments(assignments)
def get_changes(): driver = webdriver.Firefox() number_of_classes = 6 login.log_in(driver) getdata.navigate_to_gradebook(driver) gradebook_data = getdata.get_gradebook_data(driver, number_of_classes) for gradebook in gradebook_data: for assignment in parse.gradebook_changes(gradebook, gradebook_data.index(gradebook) + 1): assignment.print_assignment() print "\n" print "\n --------------- \n"
def save_all_posts(self): # save all the posts of the column if self.url is None: raise ValueError("Did not found url for the column") else: h = html2text.HTML2Text() post_num = self.get_post_count() user_session = login.log_in() for i in xrange((post_num - 1) / 100 + 1): url = self.url + "/posts" + "/?limit={0}&offset={1}".format( 100, 100 * i) r = user_session.get(url) title = self.get_title() author_id = self.get_author_id() try: os.mkdir( author_id.replace("/", "") + "-" + title + "(column)") os.chdir( author_id.replace("/", "") + "-" + title + "(column)") except OSError: os.chdir( author_id.replace("/", "") + "-" + title + "(column)") for j in r.json(): text_file = open(j["title"].replace("/", "") + ".txt", "w") text_file.write(j["title"] + "\n\n") text_file.write("Author: " + author_id + " Number of Like: " + str(j["likesCount"]) + "\n\n") text_file.write(h.handle(j["content"])) text_file.write("Published time: " + j["publishedTime"] + "\n\n") text_file.write("url is " + Zhihu + j["url"]) text_file.close() os.chdir("..") return
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.open_79_included_in_consolidation_page( driver, pth.destination_1) if f.driver_off(driver): raise except: return try: full_screen_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located(( By.XPATH, '//*[@id="root"]/section/section[2]/section/section/div[2]/div[3]/div[2]/div/div/div[1]/div/button/span[2]' ))) full_screen_button.click() full_screen_form_heading = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div[4]/div/div[2]/div/div[1]/h4/span'))) if full_screen_form_heading.text != 'Полноэкранный режим': raise except: f.close_driver(driver) with allure.step('step 8: '): assert 0, 'fail step 8' try: driver = f._79_included_in_consolidation_full_screen_enter_number( driver, pth.incorrect_79_block_num) if f.driver_off(driver): raise element_warning = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div[2]/div/div[1]/span'))) if element_warning.text != 'Parameter HostName can not be empty': raise element_block = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div[4]/div/div[2]/div/div[2]/div'))) if element_block.get_attribute( 'style') != 'background-color: rgb(194, 48, 48);': raise except: f.close_driver(driver) with allure.step('step 9: '): assert 0, 'fail step 9' time.sleep(wt) f.close_driver(driver)
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.click_menu_button(driver) if f.driver_off(driver): raise driver = f.click_service_button(driver) if f.driver_off(driver): raise driver = f.click_hosting_management_button(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 3: '): assert 0, 'fail step 3' try: driver = f.click_editing_user_groups_button(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 4: '): assert 0, 'fail step 4' time.sleep(wt) f.close_driver(driver)
def save_asks(self): # save all the user's asks if self.url is None: print "Anonymous user, cannot save asks" return else: if self.user_session is None: self.user_session = login.log_in() asks_num = self.get_asks_num() if asks_num == 0: print "No asks" return total_page = (asks_num - 1) / 20 + 1 text_file = open(self.get_id().replace('/', '') + " -Asks.txt", "w") for page in xrange(total_page): if page == 0: asks_url = self.url + "/asks" else: asks_url = self.url + "/asks" + "?page={0}".format(page + 1) r = self.user_session.get(asks_url) soup = BeautifulSoup(r.content, "lxml") ask_soup = soup.find_all("h2", class_="zm-profile-question") for asks in ask_soup: asks_text = asks.a.text asks_url = Zhihu + asks.a["href"] text_file.write(asks_text) text_file.write(asks_url + "\n\n") text_file.close() return
def application_launcher(): while True: print(""" ---------------------------------------------------------- Welcome {} to your math online quiz ========================================================== 1 - Start a new test 2 - Display scores 3 - Use a different User 4 - Exit -----------------------------------------------------------""".format(login.connected_user["username"])) user_choice = int(input("Please choose a number from the menu: ")) if user_choice == 1: level = int(input("Please enter a level between 1 to 10: ")) questions=quiz.generate_random_questions(number=5,level=level) score=quiz.quiz_maker(questions) print("Final score is {}".format(score)) scoring.save_scoring(level=level,score=score,name=login.connected_user["username"]) elif user_choice == 2: level = int(input("Please enter a level between 1 to 10: ")) scoring.display_scoring(level) elif user_choice == 3: isLogged = login.log_in() elif user_choice==4: scoring.load_python_to_json() break;
def save_all_comments(self): # save all comments of the answer if self.url is None: raise ValueError("Did not found url for the answer") else: if self.session is None: self.session = login.log_in() if self.soup is None: self.parser() answer_id = self.soup.find("div", class_="zm-item-answer ")["data-aid"] comment_url_1 = "http://www.zhihu.com/node/AnswerCommentBoxV2?params=%7B%22answer_id%22%3A%22" comment_url_2 = "%22%2C%22load_all%22%3Atrue%7D" comment_url = comment_url_1 + answer_id + comment_url_2 r = self.session.get(comment_url) soup = BeautifulSoup(r.content, "lxml") text_file = open(self.url[20:].replace("/", " ") + " comments.txt", "w") comment_list_raw = soup.find_all("div", class_="zm-item-comment") for comment_raw in comment_list_raw: like_num = int(comment_raw.find("span", class_="like-num").em.text) try: author = Zhihu + comment_raw.find("a", class_="zm-item-link-avatar")["href"] author_id = comment_raw.find("a", class_="zm-item-link-avatar")["title"] except TypeError: author = "Anonymous user" author_id = "Anonymous user" content = comment_raw.find("div", class_="zm-comment-content").text text_file.write(author_id + " " + author) text_file.write(content) text_file.write("Number of vote: " + str(like_num) + "\n\n") text_file.close() return
def test_(): driver = login.log_in() time.sleep(wt) if f.driver_off(driver): return f.close_driver(driver)
def get_topic_followed(self): # get the list of urls of the topics that the user is following if self.url is None: print "Anonymous user, cannot get topic followed" return else: if self.user_session is None: self.user_session = login.log_in() topics_followed_url = self.url + "/topics" topic_followed_num = self.get_topic_followed_num() if topic_followed_num == 0: return [] r = self.user_session.get(topics_followed_url) soup = BeautifulSoup(r.content, "lxml") cookie = login.get_cookie() _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': topics_followed_url } topic_followed_list = [] for i in xrange((topic_followed_num - 1) / 20 + 1): data = {'_xsrf': _xsrf, 'start': 0, 'offset': 20 * i} response = self.user_session.post(topics_followed_url, data=data, headers=header, cookies=cookie) topic_followed_raw = response.json()["msg"][1] main_soup = BeautifulSoup(topic_followed_raw, "lxml") topic_followed_raw = main_soup.find_all("div", class_="zm-profile-section-main") for topic in topic_followed_raw: topic = Zhihu + topic.a.next_sibling.next_sibling["href"] topic_followed_list.append(topic) return topic_followed_list
def save_questions_and_answers(self): # save all the answers of the collection if self.url is None: raise ValueError("Did not found url for the collection") else: new_session = login.log_in() for i in xrange(100): collection_url = self.url + "?page={0}".format(i + 1) r = new_session.get(collection_url) soup = BeautifulSoup(r.content, "lxml") items = soup.find_all("div", class_="zm-item") author = self.get_author() if items is None: break title = self.get_title() try: os.mkdir(user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") os.chdir(user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") except OSError: os.chdir(user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") for item in items: try: answer_url = Zhihu + item.find("a", class_="answer-date-link last_updated meta-item")["href"] except TypeError: answer_url = Zhihu + item.find("a", class_="answer-date-link meta-item")["href"] answer.Answer(answer_url).save_answer_to_file() os.chdir("..") return
def save_all_followers_profile(self): # save the profile of all the followers of the collection if self.url is None: raise ValueError("Did not found url for the collection") else: self.parser() new_session = login.log_in() cookie = login.get_cookie() xsrf = cookie["_xsrf"] header = { "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", "Host": "www.zhihu.com", "Origin": "http://www.zhihu.com", "Referer": self.url + "/followers", } title = self.get_title() text_file = open(title.replace("/", "") + " followers.txt(collection)", "w") follower_num = self.get_follower_num() for i in xrange((follower_num - 1) / 10): data = {"offset": 10 * i, "_xsrf": xsrf} r = new_session.post(self.url + "/followers", headers=header, data=data, cookies=cookie) if r.status_code != 200: raise ValueError("Error in retrieving collection's follower") soup = BeautifulSoup(r.text.decode("string_escape"), "lxml") soup = soup.find_all("a", class_="zg-link") for j in soup: follower_id = j["title"].decode("unicode-escape") follower_url = Zhihu + "/people/" + j["href"][32:] text_file.write("Url: " + follower_url + " ID: " + follower_id + "\n") text_file.close() return
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.open_71_arrival_without_sort_page(driver) if f.driver_off(driver): raise except: return try: driver = f._71_included_in_consolidation_enter_number( driver, pth.correct_71_block_num) if f.driver_off(driver): raise time.sleep(3) background_colour = WebDriverWait(driver, 10).until( EC.presence_of_element_located(( By.XPATH, '//*[@id="root"]/section/section[2]/section/section/div[2]/div[3]/div[2]/div/div' ))) background_colour = background_colour.get_attribute('style') if background_colour != 'background-color: rgb(124, 184, 47);': raise except: f.close_driver(driver) with allure.step('step 5: '): assert 0, 'fail step 5' time.sleep(wt) f.close_driver(driver)
def save_all_posts(self): # save all the posts of the column if self.url is None: raise ValueError("Did not found url for the column") else: h = html2text.HTML2Text() post_num = self.get_post_count() user_session = login.log_in() for i in xrange((post_num - 1) / 100 + 1): url = self.url + "/posts" + "/?limit={0}&offset={1}".format(100, 100 * i) r = user_session.get(url) title = self.get_title() author_id = self.get_author_id() try: os.mkdir(author_id.replace("/", "") + "-" + title + "(column)") os.chdir(author_id.replace("/", "") + "-" + title + "(column)") except OSError: os.chdir(author_id.replace("/", "") + "-" + title + "(column)") for j in r.json(): text_file = open(j["title"].replace("/", "") + ".txt", "w") text_file.write(j["title"] + "\n\n") text_file.write("Author: " + author_id + " Number of Like: " + str(j["likesCount"]) + "\n\n") text_file.write(h.handle(j["content"])) text_file.write("Published time: " + j["publishedTime"] + "\n\n") text_file.write("url is " + Zhihu + j["url"]) text_file.close() os.chdir("..") return
def start(): print(""" ---------------------------------------------------------- Login online quiz ========================================================== 1 - Connect 2 - Create new user 3 - Exit -----------------------------------------------------------""") user_choice = int(input("Please choose a number from the menu: ")) while True: if user_choice==1: isLogged=login.log_in() if(isLogged): application_launcher() login.load_python_to_json() break elif user_choice==2: login.create_new_user() login.load_python_to_json() application_launcher() break elif user_choice==3: break else: user_choice = int(input("Incorrect input.Please choose a number from the menu: "))
def test_(): driver = login.log_in() if f.driver_off(driver): return try: exit_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located(( By.XPATH, '//*[@id="root"]/section/section[1]/div/div/div[2]/div/button[2]' ))) exit_button.click() element_login = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.NAME, 'login'))) element_password = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.NAME, 'password'))) enter_button = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, 'css-1hnkt5t'))) except: f.close_driver(driver) with allure.step('step 3: '): assert 0, 'fail step 3' time.sleep(wt) f.close_driver(driver)
def parser(self): # parse the information for other functions to use user_session = login.log_in() r = user_session.get(self.url) if r.status_code != 200: raise ValueError('"' + self.url + '"' + " : it isn't a collection url.") self.soup = BeautifulSoup(r.content, "lxml") return
def parser(self): # parse the information for other functions to use self.session = login.log_in() r = self.session.get(self.url) if r.status_code != 200: raise ValueError("\"" + self.url + "\"" + " : it isn't a answer url.") self.soup = BeautifulSoup(r.content, "lxml") return
def parser(self): # parse the information for other functions to use user_session = login.log_in() r = user_session.get(self.url) if r.status_code != 200: raise ValueError("\"" + self.url + "\"" + " : it isn't a column url.") self.soup = r.json() return
def get_course(id, passwd): while True: res, s = log_in(id, passwd) if res.url != 'http://210.42.121.134/servlet/Login': return 1, res, s else: if res.text.find('验证码错误') == -1: break return 0, 0, 0
def parser(self): # parse the information for other functions to use if self.url is None: print "Anonymous user, parser failed" return else: if self.user_session is None: self.user_session = login.log_in() r = self.user_session.get(self.url) if r.status_code != 200: raise ValueError("\"" + self.url + "\"" + " : it isn't a user url.") self.soup = BeautifulSoup(r.content, "lxml") return
def get_collections_url(self): # get the list of urls of the user's collections if self.url is None: return "Anonymous user" else: if self.user_session is None: self.user_session = login.log_in() url = self.url + "/collections" r = self.user_session.get(url) soup = BeautifulSoup(r.content, "lxml") collection_url_list = [] collection_url_raw = soup.find_all("a", class_="zm-profile-fav-item-title") for collection_url in collection_url_raw: collection_url_list.append(Zhihu + collection_url["href"]) return collection_url_list
def get_columns_url(self): # get the list of urls of the user's columns if self.url is None: return "Anonymous user" else: if self.user_session is None: self.user_session = login.log_in() url = self.url + "/posts" r = self.user_session.get(url) soup = BeautifulSoup(r.content, "lxml") column_url_list = [] column_url_raw = soup.find_all("a", class_="avatar-link") for column_url in column_url_raw: column_url_list.append(column_url["href"]) return column_url_list
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.open_79_included_in_consolidation_page( driver, pth.destination_1) if f.driver_off(driver): raise except: return time.sleep(wt) f.close_driver(driver)
def save_basic_info(self): # save the basic information of the user if self.url is None: print "Anonymous user, cannot get basic" return else: h = html2text.HTML2Text() if self.user_session is None: self.user_session = login.log_in() basic_info_url = self.url + "/about" r = self.user_session.get(basic_info_url) soup = BeautifulSoup(r.content, "lxml") usr_id = self.get_id() text_file = open(usr_id.replace("/", "") + " basic info.txt", "w") try: title = soup.find("span", class_="bio")["title"] except: title = "unspecified" try: location = soup.find("span", class_="location item")["title"] except: location = "unspecified" try: business = soup.find("span", class_="business item")["title"] except: business = "unspecified" try: education = soup.find("span", class_="education item")["title"] except: education = "unspecified" try: content = h.handle(soup.find("span", class_="content").text) except: content = "unspecified" try: weibo = soup.find("a", class_="zm-profile-header-user-weibo")["href"] except: weibo = "unspecified" text_file.write("Url: " + self.url + "\n") text_file.write("Id: " + usr_id + "\n") text_file.write("Biography title: " + title + "\n") text_file.write("Sina weibo: " + weibo + "\n") text_file.write("location: " + location + "\n") text_file.write("Business: " + business + "\n") text_file.write("Education: " + education + "\n") text_file.write("Content: " + content + "\n") text_file.close() return
def save_all_followers_profile(self): # save the profile of all followers of the column if self.url is None: raise ValueError("Did not found url for the column") else: user_session = login.log_in() follower_num = self.get_follower_num() title = self.get_title() text_file = open(title.replace("/", "") + " followers.txt(column)", "w") for i in xrange((follower_num - 1) / 100): post_url = self.url + "/followers?limit={0}&offset={1}".format(100, i * 100) r = user_session.get(post_url) for j in xrange(len(r.json())): text_file.write("Url: " + r.json()[j]["profileUrl"] + " ID: " + r.json()[j]["name"] + "\n") text_file.close() return
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.open_71_arrival_without_sort_page(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 3: '): assert 0, 'fail step 3' time.sleep(wt) f.close_driver(driver)
def save_answers(self): # save all the user's answers if self.url is None: print "Anonymous user, cannot save answers" return else: h = html2text.HTML2Text() usr_id = self.get_id() answers_num = self.get_answers_num() if answers_num == 0: print "No answer" return new_session = login.log_in() total_page = (answers_num - 1) / 20 + 1 try: os.mkdir(usr_id.replace("/", "") + "-Answers") os.chdir(usr_id.replace("/", "") + "-Answers") except OSError: os.chdir(usr_id.replace("/", "") + "-Answers") for page in xrange(total_page): if page == 0: answers_url = self.url + "/answers" else: answers_url = self.url + "/answers" + "?page={0}".format(page + 1) r = new_session.get(answers_url) soup = BeautifulSoup(r.content, "lxml") soup = soup.find("div", attrs={"id": "zh-profile-answer-list"}) answer_text = soup.find_all("div", class_="zm-item-rich-text") vote_num = soup.find_all("a", class_="zm-item-vote-count") question_url = soup.find_all("a", class_="question_link") for i in xrange(len(answer_text)): text_file = open(question_url[i].text.replace('/', '') + ".txt", "w") text_file.write(Zhihu + question_url[i]["href"] + "\n\n") text_file.write(question_url[i].text + "\n\n") text_file.write("Author is : ") text_file.write(usr_id + " ") text_file.write("Number of vote is:") text_file.write(vote_num[i]["data-votecount"] + "\n\n") text_file.write(h.handle(answer_text[i].textarea.text)) text_file.close() os.chdir("..") return
def save_all_followers_profile(self): # save the profile of all followers of the column if self.url is None: raise ValueError("Did not found url for the column") else: user_session = login.log_in() follower_num = self.get_follower_num() title = self.get_title() text_file = open( title.replace("/", "") + " followers.txt(column)", "w") for i in xrange((follower_num - 1) / 100): post_url = self.url + "/followers?limit={0}&offset={1}".format( 100, i * 100) r = user_session.get(post_url) for j in xrange(len(r.json())): text_file.write("Url: " + r.json()[j]["profileUrl"] + " ID: " + r.json()[j]["name"] + "\n") text_file.close() return
def save_all_followers_profile(self): # save the profile of all the followers of the collection if self.url is None: raise ValueError("Did not found url for the collection") else: self.parser() new_session = login.log_in() cookie = login.get_cookie() xsrf = cookie["_xsrf"] header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Origin': "http://www.zhihu.com", 'Referer': self.url + "/followers", } title = self.get_title() text_file = open( title.replace("/", "") + " followers.txt(collection)", "w") follower_num = self.get_follower_num() for i in xrange((follower_num - 1) / 10): data = {'offset': 10 * i, '_xsrf': xsrf} r = new_session.post(self.url + "/followers", headers=header, data=data, cookies=cookie) if r.status_code != 200: raise ValueError( "Error in retrieving collection's follower") soup = BeautifulSoup(r.text.decode('string_escape'), "lxml") soup = soup.find_all("a", class_="zg-link") for j in soup: follower_id = j["title"].decode('unicode-escape') follower_url = Zhihu + "/people/" + j["href"][32:] text_file.write("Url: " + follower_url + " ID: " + follower_id + "\n") text_file.close() return
def save_questions_and_answers(self): # save all the answers of the collection if self.url is None: raise ValueError("Did not found url for the collection") else: new_session = login.log_in() for i in xrange(100): collection_url = self.url + "?page={0}".format(i + 1) r = new_session.get(collection_url) soup = BeautifulSoup(r.content, "lxml") items = soup.find_all("div", class_="zm-item") author = self.get_author() if items is None: break title = self.get_title() try: os.mkdir( user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") os.chdir( user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") except OSError: os.chdir( user.User(author).get_id().replace("/", "") + "-" + title + "(collection)") for item in items: try: answer_url = Zhihu + item.find( "a", class_="answer-date-link last_updated meta-item" )["href"] except TypeError: answer_url = Zhihu + item.find( "a", class_="answer-date-link meta-item")["href"] answer.Answer(answer_url).save_answer_to_file() os.chdir("..") return
def get_column_followed(self): # get the list of urls of the columns that the user is following if self.url is None: print "Anonymous user, cannot get column followed" return else: if self.user_session is None: self.user_session = login.log_in() column_followed_url = self.url + "/columns/followed" column_followed_num = self.get_column_followed_num() if column_followed_num == 0: return [] r = self.user_session.get(column_followed_url) soup = BeautifulSoup(r.content, "lxml") # print soup cookie = login.get_cookie() _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] soup1 = soup.find("div", class_="zh-general-list clearfix") string = soup1['data-init'] params = literal_eval(string)['params'] post_url = "http://www.zhihu.com/node/ProfileFollowedColumnsListV2" header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': column_followed_url } column_followed_list = [] for i in xrange((column_followed_num - 1) / 20 + 1): params['offset'] = i * 20 data = {'_xsrf': _xsrf, 'method': "next", 'params': json.dumps(params)} response = self.user_session.post(post_url, data=data, headers=header, cookies=cookie) column_followed_list_raw = response.json()["msg"] for column_followed_raw in column_followed_list_raw: main_soup = BeautifulSoup(column_followed_raw, "lxml") column_followed = main_soup.find("div", class_="zm-profile-section-main").a["href"] column_followed_list.append(column_followed) return column_followed_list
def save_top_answers(self): # save the top answers of the topic if self.url is None: raise ValueError("Did not found url for the topic") else: new_session = login.log_in() title = self.get_title() try: os.mkdir(title.replace("/", "") + " top answers(topic)") os.chdir(title.replace("/", "") + " top answers(topic)") except OSError: os.chdir(title.replace("/", "") + " top answers(topic)") for i in xrange(50): top_answer_url = self.url + "/top-answers?page={0}".format(i + 1) r = new_session.get(top_answer_url) if r.status_code != 200: break soup = BeautifulSoup(r.content, "lxml") answer_links_raw = soup.find_all("span", class_="answer-date-link-wrap") for answer_link_raw in answer_links_raw: answer_link_raw = Zhihu + answer_link_raw.a["href"] answer.Answer(answer_link_raw).save_answer_to_file() os.chdir("..") return
def testSave(driver, url): login.log_in(driver, url) gradebook_data = getdata.get_gradebook_data(driver, number_of_classes) parse.save_assignments(gradebook_data) # create(driver, url)
# -*- coding: UTF-8 -*- import login import urllib2 import sys reload(sys) sys.setdefaultencoding('utf-8') username = raw_input(u'请输入用户名:') password = raw_input(u'请输入密码:') mylog = login.log_in(username, password) mylog.login() # 测试登录成功,云飞雪逸的主页URL myUrl = 'http://weibo.com/u/3205309050?from=page_100505_profile&wvr=6&mod=like&is_all=1' res = urllib2.urlopen(myUrl) data = res.read() res.close() data = data.decode('utf-8') print data f = file('weibo.txt', 'a') f.write(data) f.close()
def save_all_followers_profile(self): # save the profile of all followers of the question if self.url is None: raise ValueError("Did not found url for the question") else: if self.session is None: self.session = login.log_in() url = self.url + "/followers" follower_num = self.get_follower_num() r = self.session.get(url) soup = BeautifulSoup(r.content, "lxml") _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': url } book = xlwt.Workbook(encoding="utf-8") new_sheet = book.add_sheet("Follower_profile") new_sheet.write(0, 0, "url") new_sheet.write(0, 1, "id") new_sheet.write(0, 2, "follower_num") new_sheet.write(0, 3, "ask_num") new_sheet.write(0, 4, "answer_num") new_sheet.write(0, 5, "agree_num") new_sheet.write(0, 6, "is_robot") cookie = login.get_cookie() row = 1 for i in xrange((follower_num-1)/20 + 1): data = {"offset": 20*i, "start": 0, "_xsrf": _xsrf} r1 = self.session.post(url, headers=header, data=data, cookies=cookie) temp_soup = BeautifulSoup(r1.json()["msg"][1], "lxml") user_list_raw = temp_soup.find_all("div", class_="zm-profile-card zm-profile-section-item zg-clear no-hovercard") for j in user_list_raw: try: user_url = j.h2.a["href"] new_sheet.write(row, 0, user_url) user_id = j.find("a", class_="zm-item-link-avatar")["title"] new_sheet.write(row, 1, user_id) sub_soup = j.find_all("a", class_="zg-link-gray-normal") try: user_follower = int(sub_soup[0].text.split()[0]) except: user_follower = sub_soup[0].text.split()[0] new_sheet.write(row, 2, user_follower) user_asks = int(sub_soup[1].text.split()[0]) new_sheet.write(row, 3, user_asks) try: user_answers = int(sub_soup[2].text.split()[0]) except: user_answers = sub_soup[2].text.split()[0] new_sheet.write(row, 4, user_answers) try: user_agrees = int(sub_soup[3].text.split()[0]) except: user_agrees = sub_soup[3].text.split()[0] new_sheet.write(row, 5, user_agrees) if user_follower < 2 and user_asks < 1 and user_answers < 2 and user_agrees < 3: is_robot = 1 else: is_robot = 0 new_sheet.write(row, 6, is_robot) except: user_url = "Anonymous user" new_sheet.write(row, 0, user_url) row += 1 book.save(self.get_title().replace("/", "") + " followers profile(question).xls") return
def save_all_voters_profile(self): # save the profile of all voters of the answer if self.url is None: raise ValueError("Did not found url for the answer") else: if self.session is None: self.session = login.log_in() if self.soup is None: self.parser() answer_id = self.soup.find("div", class_="zm-item-answer ")["data-aid"] voters_profile_url = Zhihu + "/answer/" + answer_id + "/voters_profile" cookie = login.get_cookie() header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': self.url } book = xlwt.Workbook(encoding="utf-8") new_sheet = book.add_sheet("Voter_profile") new_sheet.write(0, 0, "url") new_sheet.write(0, 1, "id") new_sheet.write(0, 2, "agree_num") new_sheet.write(0, 3, "thanks_num") new_sheet.write(0, 4, "ask_num") new_sheet.write(0, 5, "answer_num") new_sheet.write(0, 6, "is_robot") total_vote = self.get_vote_num() row = 1 robot_vote = 0 for i in xrange((total_vote - 1) / 10 + 1): data = {"total": total_vote, "offset": 10 * i} r = self.session.get(voters_profile_url, headers=header, data=data, cookies=cookie) for j in r.json()["payload"]: if row % 100 == 0 and row != 0: print "Have saved {0} voter profiles".format(row) soup = BeautifulSoup(j, "lxml") try: voter_url = soup.find("div", class_="author ellipsis").a["href"] except: voter_url = "Anonymous user" new_sheet.write(row, 0, voter_url) if voter_url != "Anonymous user": voter_id = soup.find("div", class_="author ellipsis").a["title"] new_sheet.write(row, 1, voter_id) try: voter_agree_num = int(soup.find("ul", class_="status").li.span.text.split()[0]) except ValueError: voter_agree_num = soup.find("ul", class_="status").li.span.text.split()[0] new_sheet.write(row, 2, voter_agree_num) try: voter_thanks_num = int( soup.find("ul", class_="status").li.next_sibling.next_sibling.span.text.split()[0]) except ValueError: voter_thanks_num = soup.find("ul", class_="status").li.next_sibling.next_sibling.span.text.split()[0] new_sheet.write(row, 3, voter_thanks_num) voter_ask_num = int(soup.find_all("li", class_="hidden-phone")[0].a.text.split()[0]) new_sheet.write(row, 4, voter_ask_num) voter_answer_num = int(soup.find_all("li", class_="hidden-phone")[1].a.text.split()[0]) new_sheet.write(row, 5, voter_answer_num) if voter_agree_num < 1 and voter_thanks_num < 1 and voter_ask_num < 1 and voter_answer_num < 2: voter_is_robot = 1 robot_vote += 1 else: voter_is_robot = 0 new_sheet.write(row, 6, voter_is_robot) row += 1 book.save(self.url[20:].replace("/", " ") + " voter profile(answer).xls") return robot_vote / (total_vote * 1.0)
def test_(): driver = login.log_in() if f.driver_off(driver): return try: driver = f.click_menu_button(driver) if f.driver_off(driver): raise driver = f.click_service_button(driver) if f.driver_off(driver): raise driver = f.click_hosting_management_button(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 3: '): assert 0, 'fail step 3' try: driver = f.click_editing_user_groups_button(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 4: '): assert 0, 'fail step 4' try: driver = f.click_creating_group_button(driver) if f.driver_off(driver): raise except: f.close_driver(driver) with allure.step('step 5: '): assert 0, 'fail step 5' try: element_group_name = WebDriverWait(driver, 10).until( EC.presence_of_element_located(( By.XPATH, '/html/body/div[3]/div/div[2]/div/div[2]/form/div[1]/div/div/input' ))) element_group_name.send_keys(pth.random_name) saving_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located(( By.XPATH, '/html/body/div[3]/div/div[2]/div/div[2]/form/div[2]/button[1]' ))) saving_button.click() element_editing_user_groups_page_heading = WebDriverWait( driver, 10).until( EC.presence_of_element_located( (By.XPATH, '//*[@id="root"]/section/section[2]/section/div/h1'))) if element_editing_user_groups_page_heading.text != 'Редактирование групп пользователей': raise except: f.close_driver(driver) with allure.step('step 6: '): assert 0, 'fail step 6' try: if not f.check_group_created(driver, pth.random_name): raise if not f.check_group_existence(driver, pth.random_name): raise except: f.close_driver(driver) with allure.step('step 7: '): assert 0, 'fail step 7' try: marker_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located(( By.XPATH, '//*[@id="root"]/section/section[2]/section/section/div/div/div[2]/div[1]/table/tbody/tr[1]/td[1]/label/span' ))) marker_button.click() delete_group_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located(( By.XPATH, '//*[@id="root"]/section/section[2]/section/section/div/div/div[1]/div[1]/button[4]' ))) delete_group_button.click() element_deleting_group_heading = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div[4]/div/div[2]/div/div[1]/h4/span'))) if element_deleting_group_heading.text != 'Удаление группы пользователей': raise delete_button = WebDriverWait(driver, 10).until( EC.visibility_of_element_located( (By.XPATH, '/html/body/div[4]/div/div[2]/div/div[3]/div/div/a[1]'))) delete_button.click() if f.check_group_existence(driver, pth.random_name): raise except: f.close_driver(driver) with allure.step('step 8: '): assert 0, 'fail step 8' time.sleep(wt) f.close_driver(driver)
def save_all_activity(self): # save all activities of the user if self.url is None: print "Anonymous user, cannot save all activity" return else: if self.soup is None: self.parser() usr_id = self.get_id() text_file = open(usr_id.replace("/", "") + " all activities.txt", "w") temp_soup = self.soup.find("div", class_="zm-profile-section-list profile-feed-wrap") activities = temp_soup.find_all("div", class_="zm-profile-section-main zm-profile-section-" "activity-main zm-profile-activity-page-item-main") times = temp_soup.find_all("span", class_="zm-profile-setion-time zg-gray zg-right") if len(times) != len(activities): raise ValueError("Bug in save_all_activities") for i in xrange(len(activities)): activity = activities[i] text_file.write(activity.text[:-1]) text_file.write(times[i].text + "\n\n") try: text_file.write("url is " + Zhihu + activity.a.next_sibling.next_sibling["href"] + "\n") except: text_file.write( "url is " + Zhihu + activity.a.next_sibling.next_sibling.next_sibling["href"] + "\n") if self.user_session is None: self.user_session = login.log_in() start_raw = self.soup.find_all("div", class_="zm-profile-section-item zm-item clearfix") try: start_raw[-1] except IndexError: print "No activity found" return start = start_raw[-1]["data-time"] _xsrf = self.soup.find("input", attrs={'name': '_xsrf'})["value"] data = {"start": start, "_xsrf": _xsrf} cookie = login.get_cookie() activities_url = self.url + "/activities" header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': self.url } info = 1 while True: if info % 10 == 0 and info != 0: print "Saved {0} pieces of activities".format(info * 10) r = self.user_session.post(activities_url, headers=header, data=data, cookies=cookie) if r.status_code != 200: break new_soup = BeautifulSoup(r.json()["msg"][1], "lxml") activities = new_soup.find_all("div", class_="zm-profile-section-main zm-profile-section" "-activity-main zm-profile-activity-page-item-main") times = new_soup.find_all("span", class_="zm-profile-setion-time zg-gray zg-right") if len(times) != len(activities): raise ValueError("Bug in save_all_activities") for i in xrange(len(activities)): activity = activities[i] text_file.write(activity.text[:-1]) text_file.write(times[i].text + "\n\n") try: if activity.a.next_sibling.next_sibling["href"][0:3] != "http": text_file.write("url is " + Zhihu + activity.a.next_sibling.next_sibling["href"] + "\n") else: text_file.write("url is " + activity.a.next_sibling.next_sibling["href"] + "\n") except: if activity.a.next_sibling.next_sibling.next_sibling["href"][0:3] != "http": text_file.write( "url is " + Zhihu + activity.a.next_sibling.next_sibling.next_sibling["href"] + "\n") else: text_file.write( "url is " + activity.a.next_sibling.next_sibling.next_sibling["href"] + "\n") try: start = new_soup.find_all("div", class_="zm-profile-section-item zm-item clearfix")[-1]["data-time"] except: break data["start"] = start info += 1 text_file.write("Approximately {0} pieces of activities".format(info * 10)) text_file.close() return
def testPrint(driver, url): login.log_in(driver, url) gradebook_data = getdata.get_gradebook_data(driver, number_of_classes) for gradebook in gradebook_data: print "\n" + "---------------------------------------" + "\n" + gradebook
def save_followees_profile(self): # save the profile of all the user's followees if self.url is None: print "Anonymous user, cannot save followees profile" return else: if self.user_session is None: self.user_session = login.log_in() followee_num = self.get_followee_num() if followee_num == 0: print "No followee" return followee_url = self.url + "/followees" cookie = login.get_cookie() r = self.user_session.get(followee_url) soup = BeautifulSoup(r.content, "lxml") _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] soup1 = soup.find("div", class_="zh-general-list clearfix") string = soup1['data-init'] params = literal_eval(string)['params'] post_url = "http://www.zhihu.com/node/ProfileFolloweesListV2" header = { 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", 'Host': "www.zhihu.com", 'Referer': followee_url, } book = xlwt.Workbook(encoding="utf-8") new_sheet = book.add_sheet("FolloweeList") new_sheet.write(0, 0, "url") new_sheet.write(0, 1, "id") new_sheet.write(0, 2, "follower_num") new_sheet.write(0, 3, "asks_num") new_sheet.write(0, 4, "answers_num") new_sheet.write(0, 5, "agree_num") new_sheet.write(0, 6, "is_robot") row = 1 for i in xrange((followee_num - 1) / 20 + 1): if i % 100 == 0 and i != 0: print "Have recorded", i * 20, "followees" params['offset'] = i * 20 data = {'_xsrf': _xsrf, 'method': "next", 'params': json.dumps(params)} response = self.user_session.post(post_url, data=data, headers=header, cookies=cookie) followee_list = response.json()["msg"] for j in followee_list: main_soup = BeautifulSoup(j, "lxml") followees_url = main_soup.find("h2", class_="zm-list-content-title").a["href"] new_sheet.write(row, 0, followees_url) followees_id = main_soup.find("h2", class_="zm-list-content-title").a["title"] new_sheet.write(row, 1, followees_id) info_list = main_soup.find_all("a", class_="zg-link-gray-normal") follower_num = int(info_list[0].text.split()[0]) new_sheet.write(row, 2, follower_num) asks_num = int(info_list[1].text.split()[0]) new_sheet.write(row, 3, asks_num) answers_num = int(info_list[2].text.split()[0]) new_sheet.write(row, 4, answers_num) agree_num = int(info_list[3].text.split()[0]) new_sheet.write(row, 5, agree_num) if followee_num < 5 and asks_num < 2 and answers_num < 2 and agree_num < 2: is_robot = 1 else: is_robot = 0 new_sheet.write(row, 6, is_robot) row += 1 book.save(self.get_id() + " followee list.xls") return