コード例 #1
0
    username = browser.find_element_by_name('username')
    username.send_keys(cred_username)

    password = browser.find_element_by_name('password')
    password.send_keys(cred_password + Keys.RETURN)

print(
    'Download started. Kill process to terminate. Ctrl-C to skip current task')

# download html files
success_count = 0
failure_count = 0
t_count = len(tids_list)
for tid in tids_list:
    try:
        url = generate_thread_url(tid, 1)
        if sleeptime:
            print('sleeping...')
            sleep(sleeptime)
        print('Downloading from:', url)
        browser.get(url)
        with open('%s/html/%s' % (path, parse.quote_plus(url)), 'w') as s:
            s.write(browser.page_source)
        success_count += 1
        print('OK', success_count, '/', t_count)
    except Exception as inst:
        print('There is an error:')
        print(type(inst))
        print(inst.args)
        failure_count += 1
print('Download finished,', success_count, 'succeeded,', failure_count,
コード例 #2
0
def download_first_page():
    print('Downloading the first page...')
    download_html(generate_thread_url(start_tid,1))
コード例 #3
0
# -*- coding:utf-8 -*-

""" Used to download html files directly (without login) from the board.
    This program will download threads with tid in the tids_list.
    tids list is loaded from ./pickle folder """

import pickle

from config import path, sleeptime, start_tid
from utils import download_html, generate_thread_url

tids_list = pickle.load("%s/pickle/tids_from_thread_%s.p" % (path, start_tid))

for tid in tids_list:
    try:
        if sleeptime:
            print("sleeping...")
            sleep(sleeptime)
        print("downloading:", tid)
        download_html(generate_thread_url(tid, 1))
    except Exception as inst:
        print("There is an error:")
        print(type(inst), inst.args)
コード例 #4
0
 def __init__(self, tid, pid):
     self.tid = tid
     self.pid = pid
     self.extracted_tids = []
     self.url = generate_thread_url(self.tid, self.pid)
コード例 #5
0
# -*- coding:utf-8 -*-
''' Used to download html files directly (without login) from the board.
    This program will download threads with tid in the tids_list.
    tids list is loaded from ./pickle folder '''

import pickle

from config import path, sleeptime, start_tid
from utils import download_html, generate_thread_url

tids_list = pickle.load('%s/pickle/tids_from_thread_%s.p' % (path, start_tid))

for tid in tids_list:
    try:
        if sleeptime:
            print('sleeping...')
            sleep(sleeptime)
        print('downloading:', tid)
        download_html(generate_thread_url(tid, 1))
    except Exception as inst:
        print('There is an error:')
        print(type(inst), inst.args)
コード例 #6
0
 def __init__(self,tid,pid):
     self.tid = tid
     self.pid = pid
     self.extracted_tids = []
     self.url = generate_thread_url(self.tid,self.pid)
コード例 #7
0
    username = browser.find_element_by_name("username")
    username.send_keys(cred_username)

    password = browser.find_element_by_name("password")
    password.send_keys(cred_password + Keys.RETURN)

print("Download started. Kill process to terminate. Ctrl-C to skip current task")

# download html files
success_count = 0
failure_count = 0
t_count = len(tids_list)
for tid in tids_list:
    try:
        url = generate_thread_url(tid, 1)
        if sleeptime:
            print("sleeping...")
            sleep(sleeptime)
        print("Downloading from:", url)
        browser.get(url)
        with open("%s/html/%s" % (path, parse.quote_plus(url)), "w") as s:
            s.write(browser.page_source)
        success_count += 1
        print("OK", success_count, "/", t_count)
    except Exception as inst:
        print("There is an error:")
        print(type(inst))
        print(inst.args)
        failure_count += 1
print("Download finished,", success_count, "succeeded,", failure_count, "failed. Files saved in html folder.")
コード例 #8
0
def download_first_page():
    print('Downloading the first page...')
    download_html(generate_thread_url(start_tid, 1))