def main(): print("欢迎使用 美剧天堂 爬取脚本") print("=" * 20) print("魔幻/科幻:1\n灵异/惊悚:2\n都市/感情:3\n犯罪/历史:4\n选秀/综艺:5\n动漫/卡通:6") print("=" * 20) ftype = input('请输入需要爬取的类型的代号:') start_url = "http://www.meijutt.com/file/list%s.html" % ftype ourl = openurl.OpenUrl(start_url, 'gb2312') code, doc = ourl.openurl() mylog = Logger( os.path.join(os.path.abspath(os.path.curdir), 'misc/spider_log.yaml')) logger = mylog.outputLog() if code == 200: selecter = etree.HTML(doc) pages = selecter.xpath( "//div[@class='page']/span/text()")[0].split()[0].split('/')[1] firstpage_links = selecter.xpath("//a[@class='B font_14']/@href") for firstpage_link in firstpage_links: name, download_links = get_downlink(firstpage_link) send_mysql(name, download_links, logger) time.sleep(0.5) for page in range(2, int(pages)): page_url = 'http://www.meijutt.com/file/list%s_%s.html' % (ftype, page) for link in page_link(page_url): name, download_links = get_downlink(link) if name != '' and download_links != '': send_mysql(name, download_links, logger) time.sleep(0.5) else: print("[%s] error..." % start_url) print("Done.")
def main(): mylog = Logger( os.path.join(os.path.abspath(os.path.curdir), 'misc/spider_log.yaml')) logger = mylog.outputLog() year = input("请输入年份:") allurl = get_links(year) downurl(allurl, logger)
def __init__(self, ftype): self.__ftype = ftype self.__redis_link = self.__redis_connect() mylog = Logger( os.path.join(os.path.abspath(os.path.curdir), 'misc/spider_log.yaml')) self.__logger = mylog.outputLog()
def __init__(self): self.__redis_link = self.__redis_connect() mylog = Logger( os.path.join(os.path.abspath(os.path.curdir), 'misc/spider_log.yaml')) self.__logger = mylog.outputLog() self.mysql_connect = mysql_connect.MysqlConnect( os.path.join(os.path.abspath(os.path.curdir), 'misc/mysql_data.yaml')) self.main_url = 'http://www.hanfan.cc/'
def main(): mylog = Logger(os.path.join(os.path.abspath(os.path.curdir),'misc/spider_log.yaml')) logger = mylog.outputLog() items = spiderman() for item in items: send_mysql(item, logger)