username = browser.find_element_by_name('username') username.send_keys(cred_username) password = browser.find_element_by_name('password') password.send_keys(cred_password + Keys.RETURN) print( 'Download started. Kill process to terminate. Ctrl-C to skip current task') # download html files success_count = 0 failure_count = 0 t_count = len(tids_list) for tid in tids_list: try: url = generate_thread_url(tid, 1) if sleeptime: print('sleeping...') sleep(sleeptime) print('Downloading from:', url) browser.get(url) with open('%s/html/%s' % (path, parse.quote_plus(url)), 'w') as s: s.write(browser.page_source) success_count += 1 print('OK', success_count, '/', t_count) except Exception as inst: print('There is an error:') print(type(inst)) print(inst.args) failure_count += 1 print('Download finished,', success_count, 'succeeded,', failure_count,
def download_first_page(): print('Downloading the first page...') download_html(generate_thread_url(start_tid,1))
# -*- coding:utf-8 -*- """ Used to download html files directly (without login) from the board. This program will download threads with tid in the tids_list. tids list is loaded from ./pickle folder """ import pickle from config import path, sleeptime, start_tid from utils import download_html, generate_thread_url tids_list = pickle.load("%s/pickle/tids_from_thread_%s.p" % (path, start_tid)) for tid in tids_list: try: if sleeptime: print("sleeping...") sleep(sleeptime) print("downloading:", tid) download_html(generate_thread_url(tid, 1)) except Exception as inst: print("There is an error:") print(type(inst), inst.args)
def __init__(self, tid, pid): self.tid = tid self.pid = pid self.extracted_tids = [] self.url = generate_thread_url(self.tid, self.pid)
# -*- coding:utf-8 -*- ''' Used to download html files directly (without login) from the board. This program will download threads with tid in the tids_list. tids list is loaded from ./pickle folder ''' import pickle from config import path, sleeptime, start_tid from utils import download_html, generate_thread_url tids_list = pickle.load('%s/pickle/tids_from_thread_%s.p' % (path, start_tid)) for tid in tids_list: try: if sleeptime: print('sleeping...') sleep(sleeptime) print('downloading:', tid) download_html(generate_thread_url(tid, 1)) except Exception as inst: print('There is an error:') print(type(inst), inst.args)
def __init__(self,tid,pid): self.tid = tid self.pid = pid self.extracted_tids = [] self.url = generate_thread_url(self.tid,self.pid)
username = browser.find_element_by_name("username") username.send_keys(cred_username) password = browser.find_element_by_name("password") password.send_keys(cred_password + Keys.RETURN) print("Download started. Kill process to terminate. Ctrl-C to skip current task") # download html files success_count = 0 failure_count = 0 t_count = len(tids_list) for tid in tids_list: try: url = generate_thread_url(tid, 1) if sleeptime: print("sleeping...") sleep(sleeptime) print("Downloading from:", url) browser.get(url) with open("%s/html/%s" % (path, parse.quote_plus(url)), "w") as s: s.write(browser.page_source) success_count += 1 print("OK", success_count, "/", t_count) except Exception as inst: print("There is an error:") print(type(inst)) print(inst.args) failure_count += 1 print("Download finished,", success_count, "succeeded,", failure_count, "failed. Files saved in html folder.")
def download_first_page(): print('Downloading the first page...') download_html(generate_thread_url(start_tid, 1))