def get_one_page(utils): time_one_page = time() news_api = AppleNews(html_r(), 'http://www.appledaily.com.tw/') api = SaveAppleNewsToHtml(news_api.home_url) api.store_in = "one_page/" utils.mkdir(api.store_in, True) utils.mkdir(api.store_in + '/img', True) f = open(api.store_in + 'apple.html', 'w') PageContent = utils.GetPage(opt.page) PageContent = news_api.page_parser(PageContent) api.PastHeader(f, "") PageContent = api.page_compose(PageContent) api.PastEntry(f, "", "", ''.join(PageContent), "") api.PastTail(f) logger.info("get one page spend %d sec", time() - time_one_page) sys.exit()
} if __name__ == '__main__': time_start = time() opt = main_argv_parser(sys.argv) utils = utils() # some connection timeout in seconds timeout = 10 setdefaulttimeout(timeout) if opt.page: get_one_page(utils) time_get_list = time() news_api = AppleNews(html_r(), 'http://www.appledaily.com.tw/') news_api.get_list('appledaily/todayapple') time_end_get_list = time() - time_get_list logger.info("get list spend %d sec", time_end_get_list) api = SaveAppleNewsToHtml(news_api.home_url) #Folder Prepare api.store_in = opt.folder + "/" + strftime("%Y-%m-%d", localtime()) utils.mkdir(opt.folder, False) utils.mkdir(api.store_in, False) utils.mkdir(api.store_in + '/img', False) blacklist = open('blacklist.txt', 'r').readlines() if blacklist: blf = open(api.store_in + '/blacklist.html', 'w')