def getImages(self, url, title): try: r = self.get(url) html = r.text soup = BeautifulSoup(html, 'html.parser') p_images = soup.find_all('p', style='text-align:center') if len(p_images) > 0: # 新增专题 now = int(time.time()) topic_id = Mysql.create("INSERT INTO lab_topic (title, create_time, update_time) VALUES ('%s', %s, %s)" % (title, now, now)) images_values = [] for p_image in p_images: url = p_image.img['src'] p_name = p_image.img['alt'] p_object = re.search( r'id=(\d*)\..*', p_name, re.I) p_id = int(p_object.group(1)) if p_object else 0 # 获取文件后缀名 etc = os.path.splitext(url)[1] date = time.strftime('%Y%m%d',time.localtime(time.time())) old_name = date + str(round(time.time() * 1000)) + p_name name = hashlib.md5(old_name.encode(encoding='UTF-8')).hexdigest() + etc downloadPath = os.path.join(self.downloadPath, date) self.downloadImage(url, downloadPath, name) save_path = date + '/' + name images_values.append("('%s', %s, '%s', %s, %s, %s)" % (save_path, topic_id, p_name, p_id, now, now)) create_sql = 'INSERT INTO lab_image (url, topic_id, name, p_id, create_time, update_time) VALUES ' + (','.join(images_values)) Mysql.execute(create_sql) print('\033[1;32m--------------------已创建:', title, '\033[0m') except Exception as e: # TODO: log print(e) return
def createNews(self, url): try: r = self.get(url) html = r.text soup = BeautifulSoup(html, 'html.parser') # 获取相关文章的链接 linkHtml = soup.find('a', class_='dec_img') viewUrl = str(linkHtml['href']) if self.oldUrl == viewUrl: return -2 image = str(linkHtml.img['src']) subject = str(linkHtml['title']).replace("'", "''") introHtml = linkHtml.parent.parent intro = introHtml.find('p', class_='com_about').get_text().replace("'", "''") catalogName = introHtml.find('span', class_='bq_ico').get_text() if catalogName == '美图': # 美图类型不获取 return -2 category_id = self.getCatalog(catalogName) content = self.getNewsView(viewUrl) now = int(time.time()) user_id = 1 author = '网络' sql = "INSERT INTO news (user_id, author, catalog_id, title, intro, content, cover, ctime, utime) VALUES (%s, '%s', %s, '%s', '%s', '%s', '%s', %s, %s)" % (user_id, author, category_id, subject, intro, content, image, now, now) newsID = Mysql.create(sql) return '{ "id": ' + str(newsID) + ', "url": "' + viewUrl + '"}' except Exception as e: # TODO: log # print(e) return -1