def get_browser_params(driver): url = driver.current_url window_name = cmn.get_window_name(driver) topbar = cmn.check_topbar(driver) print cmn.get_datetime(driver) # for c in driver.get_cookies(): # print c return url, window_name, topbar
def test_payoff_summary(dpp): debt_name = "payoff summary check" starting_balance = randint(1001, 10000000) number_of_payments = randint(2, 15) minimum_payment = math.ceil(starting_balance / number_of_payments) apr = randint(3, 15) payoff_progress = 0 base_page = page.BasePage(dpp) main_page = base_page.open_main_page_as_guest() main_page.add_debt_parametrized(debt_name, starting_balance, minimum_payment, apr) total_interest = common.get_total_interest(starting_balance, minimum_payment, number_of_payments, apr) current_balance = starting_balance - payoff_progress first_month_interest = round(current_balance * apr * 0.01 / 12, 2) date = common.get_datetime() debt_free_on = common.add_months(date, number_of_payments + 1).strftime("%b %Y") total_of_payments = starting_balance + total_interest total_interest_percent = round((total_interest / total_of_payments) * 100, 1) main_page.check_payoff_summary( current_balance, starting_balance, minimum_payment, first_month_interest, debt_free_on, number_of_payments + 1, total_of_payments, total_interest, total_interest_percent, ) # Add payment amount main_page.add_payment_ammount(minimum_payment) current_balance = starting_balance - minimum_payment first_month_interest = round(current_balance * apr * 0.01 / 12, 2) date = common.get_datetime() debt_free_on = common.add_months(date, number_of_payments).strftime("%b %Y") total_interest = common.get_total_interest(current_balance, minimum_payment, number_of_payments - 1, apr) total_of_payments = current_balance + total_interest total_interest_percent = round((total_interest / total_of_payments) * 100, 1) main_page.check_payoff_summary( current_balance, starting_balance, minimum_payment, first_month_interest, debt_free_on, number_of_payments, total_of_payments, total_interest, total_interest_percent, )
def write_to_done_log(line, new_title, dir_path=os.getcwd()): done_file_list = FileTool.get_appoint_file( 'done-' + common.get_datetime('%Y-%m') + '.log', dir_path) # print(dir_path) if len(done_file_list) == 0: done_log = dir_path + 'done-' + common.get_datetime('%Y-%m') + '.log' else: done_log = done_file_list[0] logger.info("保存已下载图片链接--->>>done.log: " + done_log) # os.chdir(os.getcwd()) with open(done_log, 'a+', encoding='utf-8') as f: f.write('%s:[%s,%s]\n' % (common.get_datetime('%Y/%m/%d %H:%M'), line.strip('\n'), new_title))
def save_not_down_url(dir_path, line, new_title, num): name_list = FileTool.get_appoint_file( 'un_done-' + common.get_datetime('%Y-%m') + '.log', dir_path) # print(dir_path) if len(name_list) == 0: file_name = dir_path + 'un_done-' + common.get_datetime( '%Y-%m') + '.log' else: file_name = name_list[0] logger.info('un_down_file:' + file_name) # os.chdir(dir_path) with open(file_name, 'a+', encoding='utf-8') as f: # f.write('第' + str(num) + '行:' + line + ',' + new_title + '\n') f.write('%s:[%s,%s]\n' % (common.get_datetime('%Y/%m/%d %H:%M'), line, new_title))
def get_img_url_list(url): try: soup = BeautySoupTool.BeautySoupTool(url) # new_title = common.replace_sub(title) img_url_list = soup.beautySoup.select( "body div[id='wrap'] div[id='postlist'] div[id] table tr td[class='postcontent'] div[class='defaultpost'] table tr td img[file]" ) img_url_list_2 = soup.beautySoup.select( "body div[id='wrap'] div[id='postlist'] div[id] table tr td[class='postcontent'] div[class='defaultpost'] div div table tr td img[file]" ) img_url_list_3 = soup.beautySoup.select( "body div[id='wrap'] div[id='postlist'] div[id] table tr td[class='postcontent'] div[class='defaultpost'] div div div[class='postattachlist'] dl dd p img[file]" ) img_url_list_1 = soup.beautySoup.select( "body div[id='wrap'] div[id='postlist'] div[id] table tr td[class='postcontent'] div[class='defaultpost'] div div table tbody tr td a[href]" ) img_url_list.extend(img_url_list_2) img_url_list.extend(img_url_list_3) img_url_list.extend(img_url_list_1) # print('----------- 去重 ------------------') new_list = common.list_distinct(img_url_list) # print('去重后图片数量:' + str(len(new_list))) return [new_list, soup.title] except Exception as e: logger.info("get_img_url_list 请求失败,{},{}".format( common.get_datetime('%Y/%m/%d %H:%M'), e)) return [[url], e]
def future_dowm_img(img_url, proxy_ip, line_num, img_nums, img_num, down_path): os.chdir(down_path) image_name = img_url.split("/")[-1] if not os.path.exists(image_name): # logger.debug('start get request.get') try: get_request = requests.get(img_url, headers=common.header, proxies=proxy_ip, timeout=10) except Exception as e: logger.info("请求失败,请求时间是:{}".format( common.get_datetime('%Y/%m/%d %H:%M'))) logger.info('失败原因:%s' % e) time.sleep(5) get_request = requests.get(img_url, headers=common.header, proxies=proxy_ip, timeout=10) # logger.debug('end request.get') image = get_request.content image_b = io.BytesIO(image).read() # logger.info('%s 图片大小 : %i kb' % (image_name, len(image_b) / 1000)) logger.info('%s start down 第 %i 行:第 %i / %i 个 : %s ;size:%s kb' % (threading.current_thread().name, line_num, img_num + 1, img_nums, img_url, len(image_b) / 1000)) if len(image_b) > 0: os.chdir(down_path) with open(image_name, 'wb') as f: f.write(image)
def write_to_text(params): temp = 0 start_page = params['start_page'] end_page = params['end_page'] select_str = params['select_str'] down_url = params['down_url'] # done_down_text = cur_dir + params['done_down_text'] # print(os.getcwd()) done_down_text = common.get_file_name_list(os.getcwd(), "text")[0] print(done_down_text) with open(done_down_text, 'a+') as fileObj: readLines = fileObj.read().splitlines() for i in range(start_page, end_page): soup = common.get_beauty_soup(down_url % i) itemUrl = soup.select(select_str) for j in range(0, len(itemUrl)): fileUrl = itemUrl[j].get('href') print('fileUrl:' + fileUrl) if fileUrl is not None and fileUrl.find( '.html') >= 0 and fileUrl not in readLines: os.chdir(cur_dir) # 保存已下载连接 with open(done_down_text, 'a+') as ff: ff.write(fileUrl + '\n') fileUrl = pre_url + fileUrl temp += 1 print("fileUrl:" + fileUrl) # 保存下载连接 with open( common.get_datetime('%Y-%m-%d') + '_' + str(temp // 500) + '.txt', 'a+') as f: f.write(fileUrl + '\n')
def __init__(self): if not DateTool.__init_flag: self.cur_month = common.get_datetime('%Y-%m') today = datetime.date.today() first = today.replace(day=1) last_month = first - datetime.timedelta(days=1) self.pre_month = last_month.strftime("%Y-%m") DateTool.__init_flag = True
def save_url_down(done_down_text, file_down_url, pic_href, num, title): file_name = '%s-%s_%i.txt' % (title, common.get_datetime('%Y-%m-%d'), num // 500) with open(file_name, 'a+') as f: f.write(file_down_url + '\n') # 保存已下载的连接,防止重复下载 with open(done_down_text, 'a+') as f: f.write(pic_href + '\n')
def create_down_root_path(category_name, title): # root_path = down_path_c + category_name + os.sep + common.get_datetime('%Y-%m') + os.sep + str(title.strip()) + os.sep root_path = '%s%s%s%s%s' % (user_dir, 'Pictures' + os.sep + 'Camera Roll' + os.sep + 'P**N', os.sep + category_name + os.sep, common.get_datetime('%Y-%m'), os.sep + str(title.strip()) + os.sep) if not (os.path.exists(root_path)): os.makedirs(root_path) return root_path
def get_image_url(qid, next_page): # next_page = 'https://www.zhihu.com/api/v4/questions/420285724/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%2Cis_recognized%2Cpaid_info%2Cpaid_info_content%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics%3Bsettings.table_of_content.enabled%3B&limit=20&offset=0&platform=desktop&sort_by=default' done_down_path = done_down_file_path % qid if not os.path.exists(done_down_path): with open(done_down_path, 'w+') as cf: readLines = cf.read().splitlines() print('file not exist ;create file :%s' % done_down_path) else: with open(done_down_path, 'r') as file_obj: readLines = file_obj.read().splitlines() with open(parent_dir + '/doneDown.text') as obj: readLinesCopy = obj.read().splitlines() readLines.extend(readLinesCopy) while True: post = requests.get(next_page, headers=headers) # print(post) loads = json.loads(post.text) if loads.get('error'): print(loads.get('error')) return data = loads.get('data') if len(data) == 0: return else: print('总回答数量:%s' % loads.get('paging').get('totals')) # 获取图片链接 print('当前页回答数量:%i' % len(data)) temp = 0 for i, item in enumerate(data): content = pq(item['content']) imgUrls = content.find('noscript img').items() for imgTag in imgUrls: src = imgTag.attr("src") img_name = os.path.basename(src).split('?')[0] temp += 1 os.chdir(cur_dir) if img_name not in readLines: # 保存图片下载链接到 txt file_name = '%i_%s_%i.txt' % (qid, common.get_datetime('%Y-%m-%d_%H-%M'), temp // 500) with open(file_name, 'a+') as f: f.write(src + '\n') # 保存已下载的连接,防止重复下载 with open(done_down_path, 'a+') as f: f.write(img_name + '\n') else: print('第' + str(temp + 1) + '个已存在:' + src) if img_name in readLinesCopy: # 保存旧的图片名字到 done\qid_doneDown.text,防止doneDown.text文件太大 with open(done_down_path, 'a+') as f: f.write(img_name + '\n') # 获取下一页 next_page = loads.get('paging').get('next') print('下一页:' + next_page) time.sleep(3)
def save_fail_url(urls): try: with open(file_dir + 'un_down_' + common.get_datetime('%Y-%m-%d') + '.log', 'r', encoding='utf-8') as f: lines = f.readlines() logger.info('写入前文本数量:%i, 写入数量:%i;写入后总量%i' % (len(lines), len(urls), len(lines) + len(urls))) except IOError: logger.error('error: 没有找到文件或读取文件失败,待写入数量:%i' % len(urls)) with open(file_dir + 'un_down_' + common.get_datetime('%Y-%m-%d') + '.log', 'a+', encoding='utf-8') as f: for value in urls: f.write('%s:[%s,%s]\n' % (common.get_datetime('%Y/%m/%d %H:%M'), value['url'].strip('\n'), value['title']))
def set_date(self, field, date): if date == None: date = common.get_datetime()() self.click(field) # set year while int(self.get_text(MainPageLocators.year)) < date.year: self.click(MainPageLocators.month_prev) # set day self.click(self.get_element_contains_text(MainPageLocators.date, date.day))
def check_payoff(self, debt_1, debt_2): # Check first step of payment debt_1_payment = debt_1.minimum_payment debt_2_payment = debt_2.minimum_payment step_number = 0 if debt_1.remainder: step_number = 1 self.check_step_details(0, debt_1.debt_name, debt_1.minimum_payment, (debt_1.number_of_payments - 1)) self.check_step_details(0, debt_2.debt_name, debt_2.minimum_payment, (debt_1.number_of_payments - 1)) # Check the last step of the first payment debt_1_payment = debt_1.starting_balance - debt_1.minimum_payment * (debt_1.number_of_payments - 1) debt_2_payment = debt_2.minimum_payment + debt_1.minimum_payment - debt_1_payment self.check_step_details(1, debt_1.debt_name, debt_1_payment, 1) if debt_2.remainder: self.check_step_details(1, debt_2.debt_name, debt_2_payment, 1) else: self.check_step_details(0, debt_1.debt_name, debt_1_payment, debt_1.number_of_payments) self.check_step_details(0, debt_2.debt_name, debt_2_payment, debt_1.number_of_payments) self.check_step_debt_paid(0, debt_1) if debt_1.number_of_payments != debt_2.number_of_payments: # Check first step of the second debt after payoff of the first debt minimum_payment = debt_2.minimum_payment + debt_1.minimum_payment left_to_pay = debt_2.starting_balance - ( debt_2.minimum_payment * debt_1.number_of_payments + debt_1.minimum_payment - debt_1_payment ) left_duration = int(left_to_pay / minimum_payment) if left_to_pay <= minimum_payment: self.check_step_details(step_number + 1, debt_2.debt_name, left_to_pay, 1) if left_duration > 1: self.check_step_details(step_number + 1, debt_2.debt_name, minimum_payment, left_duration) last_payment = left_to_pay - minimum_payment * left_duration self.check_step_details(step_number + 2, debt_2.debt_name, last_payment, 1) debt_2.debt_free_on = common.add_months( common.get_datetime(), debt_1.number_of_payments + left_duration + 1 ).strftime("%b %Y") debt_2.debt_free_years_month = common.get_years_month_debt_free( debt_1.number_of_payments + left_duration + 1 ) self.check_step_debt_paid(1, debt_2)
def write_txt(params): gLock.acquire() os.chdir(cur_dir) question_id = params['question_id'] save_question_id(question_id_path, question_id) # url = "https://www.zhihu.com/question/{qid}".format(qid=question_id) img_list = get_image_url(question_id) # 获取图片的地址列表 if len(img_list) == 0: return # print(len(img_list)) temp = 0 done_down_path = done_down_file_path % question_id with open(done_down_path, 'w+') as file_obj: readLines = file_obj.read().splitlines() with open(parent_dir + '/doneDown.text') as obj: readLinesCopy = obj.read().splitlines() readLines.extend(readLinesCopy) print('done-length:', end=" ") print(len(readLines)) for i in range(0, len(img_list)): img_url = img_list[i] # img_name = img_url.split("/")[-1] img_name = os.path.basename(img_url) temp += 1 os.chdir(cur_dir) if img_name not in readLines: file_name = '%i_%s_%i.txt' % ( question_id, common.get_datetime('%Y-%m-%d_%H-%M'), temp // 500) with open(file_name, 'a+') as f: f.write(img_url + '\n') # 保存已下载的连接,防止重复下载 with open(done_down_path, 'a+') as f: f.write(img_name + '\n') else: print('第' + str(i + 1) + '个已存在:' + img_url) if img_name in readLinesCopy: # 保存旧的连接,防止文件太大 with open(done_down_path, 'a+') as f: f.write(img_name + '\n') gLock.release()
def test_debt_dialog_validation_promo_apr(dpp): error_message = '"Promo APR" is out of range: 0<=Promo APR<=99' base_page = page.BasePage(dpp) main_page = base_page.open_main_page_as_guest() main_page.click(MainPageLocators.add_button) main_page.send_keys(MainPageLocators.debt_name_edit, "Name") main_page.send_keys(MainPageLocators.debt_balance_edit, randint(1, 10000000)) main_page.send_keys(MainPageLocators.debt_minimum_edit, randint(1, 10000000)) main_page.send_keys(MainPageLocators.debt_apr_edit, randint(0, 99)) main_page.click(MainPageLocators.has_promo) main_page.set_date(MainPageLocators.promo_expires_date, common.get_datetime()) main_page.validation_check(MainPageLocators.promo_apr, MainPageLocators.save_button, "-1", error_message) main_page.validation_check(MainPageLocators.promo_apr, MainPageLocators.save_button, "-0.01", error_message) main_page.validation_check(MainPageLocators.promo_apr, MainPageLocators.save_button, "99.1", error_message) main_page.validation_check(MainPageLocators.promo_apr, MainPageLocators.save_button, "100", error_message) main_page.validate_debt_field(MainPageLocators.promo_apr, 0) main_page.validate_debt_field(MainPageLocators.promo_apr, 0.01) main_page.validate_debt_field(MainPageLocators.promo_apr, 0.99) main_page.validate_debt_field(MainPageLocators.promo_apr, 99)
def down_noval(params): # 文件下载路径 down_path = params['down_file_path'] + cur_month select_str = params['select_str'] require_pre_url = params['require_pre_url'] if require_pre_url: pre_url_ = params['pre_url'] if not (os.path.exists(down_path)): os.makedirs(down_path) file_name_list = common.get_file_name_list(cur_dir, 'txt') for index, file_name in enumerate(file_name_list, 1): print('下载第 %i 个文件: %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') if require_pre_url: line = pre_url_ + line print('第 %i 行: %s' % (num, line)) soup = common.get_beauty_soup(line) title = common.replace_special_char(soup.title.string) newTitle = title.split('www')[-1] print(str(newTitle.strip())) textContent = soup.select(select_str) print('text数量:' + str(len(textContent))) if len(textContent) == 0: os.chdir(os.getcwd()) with open( common.get_datetime('%Y-%m-%d') + '_未下载.txt', 'a+') as f: f.write('第' + str(num) + '行:' + line + ',' + newTitle + '\n') else: os.chdir(down_path) with open(newTitle.strip() + '.txt', 'a+', encoding='utf8') as f: for i in range(0, len(textContent)): text = textContent[i].text os.chdir(down_path) f.write(text + '\n') print("-----down over----------------")
def __init__(self, debt_name, starting_balance, minimum_payment = None, apr = None, number_of_payments = None, category = None, payment_due_date = None, payoff_progress = None): self.debt_name = debt_name self.starting_balance = starting_balance self.minimum_payment = minimum_payment self.number_of_payments = number_of_payments self.apr = apr self.category = category self.payment_due_date = payment_due_date self.payoff_progress = apr self.remainder = False if minimum_payment == None and number_of_payments != None: self.minimum_payment = math.ceil(starting_balance / number_of_payments) if starting_balance % number_of_payments != 0: self.number_of_payments = int(number_of_payments + 1) self.remainder = True if number_of_payments == None and minimum_payment != None: self.number_of_payments = math.ceil(starting_balance / minimum_payment) if starting_balance % minimum_payment != 0: self.number_of_payments = int(self.number_of_payments + 1) self.remainder = True if apr == None: self.apr = 0 self.total_interest = 0 else: self.total_interest = common.get_total_interest(self.starting_balance, self.minimum_payment, self.number_of_payments, self.apr) self.number_of_payments = math.floor((self.starting_balance + self.total_interest) / self.minimum_payment) if (self.starting_balance + self.total_interest) % self.minimum_payment != 0: self.number_of_payments = int(self.number_of_payments + 1) self.remainder = True if payoff_progress == None: self.payoff_progress = 0 date = common.get_datetime() self.debt_free_on = common.add_months(date, self.number_of_payments).strftime('%b %Y') self.debt_free_years_month = common.get_years_month_debt_free(self.number_of_payments)
def get_page_list(page_url): html = requests.get(page_url, headers=carhome.header, proxies=carhome.proxy_ip) # html.encoding = 'utf-8' soup = BeautifulSoup(html.text, 'lxml') title = soup.title.string results = soup.select("body div div[class='choise-con'] ul[class='content'] li div[class='pic-box'] a") print('当前页获取到的连接数:%i' % len(results)) # 写入文件 num = 0 if len(results) > 0: for index in range(0, len(results)): result = 'https:%s' % results[index].get('href') if result in done_list: print("当前链接已下载") continue else: num += 1 file_name = '%s-%s_%i.txt' % (title, common.get_datetime('%Y-%m-%d_%H%M'), num // 500) with open(file_name, 'a+') as fileObj: fileObj.write(result + '\n') # 保存已下载的连接,防止重复下载 with open('car_home_done.text', 'a+') as fileO: fileO.write(result + '\n')
def __init__(self, url, proxy_ip=ProxyIp.ProxyIp().get_random_proxy_ip(), encoding='utf-8', timeout=10): try: html = requests.get(url, headers=common.header, proxies=proxy_ip, timeout=timeout) self.status_code = html.status_code html.encoding = encoding self.beautySoup = BeautifulSoup(html.text, 'lxml') self.title = common.replace_sub( self.beautySoup.title.string).strip() # if html.status_code == 200: # self.title = common.replace_sub(self.beautySoup.title.string).strip() # else: # self.title = None # logger.info('状态码错误:%i' % html.status_code) except Exception as e: self.title = self.status_code logger.info("BeautySoupTool 请求失败,{},{}".format( common.get_datetime('%Y/%m/%d %H:%M'), e))
import os import common # 获取总行数 path = 'D:/down/linglei/%s/' % common.get_datetime('%Y-%m-%d') if not (os.path.exists(path)): os.makedirs(path) file_list = common.get_file_name_list(os.getcwd(), 'txt') for index, file_name in enumerate(file_list, 1): print('下载第 %i 个文件: %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') print('第 %i 行: %s' % (num, line)) itemSoup = common.get_beauty_soup(line) title = common.replace_special_char(itemSoup.title.string) newTitle = title.split('www')[-1] print(str(newTitle.strip())) textContent = itemSoup.select( "body div[class='maomi-content'] main[id='main-container'] div[class='content']") if len(textContent) == 0: # os.chdir(path) with open('wuxia_%s_未下载.txt' % common.get_datetime('%Y-%m-%d'), 'a+') as f: f.write('第 %i 行:%s; %s \n' % (num, line, newTitle)) else: os.chdir(path) with open(newTitle.strip() + '.txt', 'a+', encoding='utf8') as f:
import os import common # 获取总行数 down_path = 'D:/down/changpian/%s/' % common.get_datetime('%Y-%m-%d') if not (os.path.exists(down_path)): os.makedirs(down_path) file_list = common.get_file_name_list(os.getcwd(), 'txt') for index, file_name in enumerate(file_list, 1): print('下载第 %i 个文件: %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') print('第 %i 行: %s' % (num, line)) itemSoup = common.get_beauty_soup(line) title = common.replace_special_char(itemSoup.title.string) newTitle = title.split('www')[-1] print(str(newTitle.strip())) textContent = itemSoup.select( "body div[class='main'] div[class='contentList'] div[class='content'] p" ) print('text数量:' + str(len(textContent))) if len(textContent) == 0: # os.chdir(path) with open( 'changpian_%s_未下载.txt' % common.get_datetime('%Y-%m-%d') + '', 'a+') as f:
def test_add_principal_payment(dpp): base_page = page.BasePage(dpp) main_page = base_page.open_main_page_as_guest() main_page.add_debt() main_page.add_payment_ammount(20, common.get_datetime()) main_page.check_payment_ammount(20)
import os import common # 获取总行数 path = 'D:/down/qinggan/%s/' % common.get_datetime('%Y-%m-%d') if not (os.path.exists(path)): os.makedirs(path) file_list = common.get_file_name_list(os.getcwd(), 'txt') for index, file_name in enumerate(file_list, 1): print('下载第 %i 个文件: %s' % (index, file_name)) # 打开文件 with open(file_name) as file_obj: for num, value in enumerate(file_obj, 1): line = value.strip('\n') print('第 %i 行: %s' % (num, line)) itemSoup = common.get_beauty_soup(line) title = common.replace_special_char(itemSoup.title.string) newTitle = title.split('www')[-1] print(str(newTitle.strip())) textContent = itemSoup.select( "body div[class='maomi-content'] main[id='main-container'] div[class='content']" ) if len(textContent) == 0: with open( 'qinggan_%s_未下载.txt' % common.get_datetime('%Y-%m-%d'), 'a+') as f: f.write('第 %i 行:%s ; %s \n' % (num, line, newTitle) + str(num))
import os import common temp = 0 for i in range(1, 47): print('第 %i 页' % i) url = "https://www.4455sk.com/xiaoshuo/list-情感小说-%i.html" print(url) soup = common.get_beauty_soup(url) itemUrl = soup.select( "body div[class='maomi-content'] main[id='main-container'] div[class='text-list-html'] div ul li a" ) for j in range(0, len(itemUrl)): fileUrl = itemUrl[j].get('href') # print('fileUrl:'+fileUrl) if fileUrl is not None and fileUrl.find('.html') >= 0: fileUrl = "https://www.4455sk.com/" + fileUrl temp += 1 print("fileUrl:" + fileUrl) os.chdir(os.getcwd()) with open( 'qinggan_%s_%i.txt' % (common.get_datetime('%Y-%m-%d'), temp // 500), 'a+') as f: f.write(fileUrl + '\n') print("打印完成")
DOWN_PATH_JH_ZPDR_OS = user_dir + 'Pictures/Camera Roll/P**N/自拍达人原创申请_JH' DOWN_PATH_JH_ZPDR_F = 'F:/P**N/精华/自拍达人原创申请_JH' DOWN_PATH_JH_ZPDR_D = 'D:/P**N/自拍达人原创申请_JH' DOWN_PATH_JH_ZPDR_Linux = '/usr/local/src/P**N/自拍达人原创申请_JH' # 原创自拍 DOWN_PATH_YCZP_OS = user_dir + '/Pictures/Camera Roll/all/原创自拍区' DOWN_PATH_YCZP_F = 'F:/图片/p**n/ALL/原创自拍区' DOWN_PATH_YCZP_D = 'D:/p**n/ALL/原创自拍区' DOWN_PATH_YCZP_Linux = '/usr/local/src/p**n/ALL/原创自拍区' # from urlparse import urlsplit # 获取当前文件路径 # cur_dir = os.path.abspath(os.curdir) + os.sep cur_dir = os.getcwd() + os.sep # 获取当前月份 cur_month = os.sep + common.get_datetime('%Y-%m') + os.sep header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36' } pre_url = 'https://f.w24.rocks/' # pre_url = 'http://f.wonderfulday25.live/' # 自拍达人下载链接 # http://f.wonderfulday25.live/forumdisplay.php?fid=19&orderby=dateline&filter=2592000&page=2 down_url_zpdr = 'forumdisplay.php?fid=19&orderby=dateline&filter=2592000&page=%i' down_url_zpdr_jh = 'forumdisplay.php?fid=19&orderby=dateline&filter=digest&page=%i' # 兴趣分享下载链接 down_url_xqfx = 'forumdisplay.php?fid=33&orderby=dateline&filter=2592000&page=%i'