Exemple #1
0
# STEP 5 Fetch #
print('Fetching start!...')
# ...Then I realized that it needs to be in a kind of order.
# Or when you figure out a broken post page, it doesn't restart from that.
# It will hurt your feelings.
# So...sh*t
important_info_key_list = [key for key in important_info]
important_info_key_list.sort()
for key in important_info_key_list:
    name = key
    href = important_info[name]
    print('Fetching %s...' % name)
    print('URL %s' % href)

    title = p9b.get_title_from_url(href)
    print('TITLE %s' % title)
    if title.find(name) == -1:
        print('WARN The title does not contain %s.' % name)
        print('Something may be wrong.')
    post = p9c.Post(href)
    post.match()
    post.migration()
    floors = post.get_real_floors()
    if not floors:
        print('ERROR Can not get info from url.')
        break

    important_info_list = list()
    for real_floor in floors:
        if real_floor['comments'] is None:
Exemple #2
0
printl("Initializing...")
from P0009 import btbb as p9b
from P0009 import btbbc as p9c

# API #
ROOT_URL = "http://tieba.baidu.com/p/4260990232"
IGNORE_FLOOR = []
STORE_FILE_NAME = "/home/wan/N_E_I_H version1.0/C0011/233.txt"
print("[OK]")

# FETCH DATA #
data = dict()

printl("Fetching title of url: '%s'..." % ROOT_URL)
data["title"] = p9b.get_title_from_url(ROOT_URL)
print("[OK]")
print("TITLE %s" % data["title"])

printl("Fetching data of url: '%s'..." % ROOT_URL)
root_post = p9c.Post(ROOT_URL)
print("[OK]")

printl("Process step #1...")
root_post.match()
root_post.migration()
print("[OK]")

print("Process step #2...")
root_info = root_post.get_real_floors()
child_list = list()