Exemple #1
0
def get_one_page(utils):
    time_one_page = time()
    news_api = AppleNews(html_r(), 'http://www.appledaily.com.tw/')
    api = SaveAppleNewsToHtml(news_api.home_url)
    api.store_in = "one_page/"
    utils.mkdir(api.store_in, True)
    utils.mkdir(api.store_in + '/img', True)
    f = open(api.store_in + 'apple.html', 'w')

    PageContent = utils.GetPage(opt.page)
    PageContent = news_api.page_parser(PageContent)
    api.PastHeader(f, "")
    PageContent = api.page_compose(PageContent)
    api.PastEntry(f, "", "", ''.join(PageContent), "")
    api.PastTail(f)
    logger.info("get one page spend %d sec", time() - time_one_page)
    sys.exit()
Exemple #2
0
}

if __name__ == '__main__':
    time_start = time()
    opt = main_argv_parser(sys.argv)
    utils = utils()

    # some connection timeout in seconds
    timeout = 10
    setdefaulttimeout(timeout)

    if opt.page:
        get_one_page(utils)

    time_get_list = time()
    news_api = AppleNews(html_r(), 'http://www.appledaily.com.tw/')
    news_api.get_list('appledaily/todayapple')
    time_end_get_list = time() - time_get_list
    logger.info("get list spend %d sec", time_end_get_list)

    api = SaveAppleNewsToHtml(news_api.home_url)

    #Folder Prepare
    api.store_in = opt.folder + "/" + strftime("%Y-%m-%d", localtime())
    utils.mkdir(opt.folder, False)
    utils.mkdir(api.store_in, False)
    utils.mkdir(api.store_in + '/img', False)

    blacklist = open('blacklist.txt', 'r').readlines()
    if blacklist:
        blf = open(api.store_in + '/blacklist.html', 'w')