def mysave(self, tocatid): self.database = Mysql(host="121.199.48.196", user="******", pwd="rajltool321123", db="m_wxhs120_com") self.tocatid = tocatid # self.sDir = "d:/uploadfile/"#图片本地目录 # self.sDir = "/mnt/xvdb1/virtualhost/vmO2xqlA/uploadfile/"#图片本地目录 # self.picurl = "http://imgs.najiaoluo.com/"#远程图片域名 # if os.path.exists(self.sDir)==False: # os.mkdir(sDir) # os.chmod(sDir,0o777) #其实makedirs默认就是777权限,不知为什么不可以 # sName = sDir+str(int(time.time()))+'.txt' print('正在采集--' + self.title + '--文章') title = self.title.encode('gbk', 'ignore').decode('gbk') if (title.strip() == ''): print("标题,不采集!") return isexist1 = "" try: sql = "select id from v9_news where title='%s' and catid='%s' order by title desc" % ( title, self.tocatid) # print(sql) isexist1 = self.database.ExecQuery(sql) except Exception as e: print("查询信息出错,错误信息:%s" % (e)) pass if isexist1: print(title + '-----> 有重复不提交!') else: #无相关记录时提交数据 # pass self.addnews()
def stock_telescope(self): cf = ConfigParser.ConfigParser() cf.read("%s/%s.ini" % (self.Config_path, self.configs['name']) ) host = cf.get("db", "host") port = cf.get("db", "port") user = cf.get("db", "user") password = cf.get("db", "pass") database = cf.get("db", "database") conn = Mysql( host = host, user = user, password = password, database = database, ) cid = [] if "|" in self.configs['cid']: for i in self.configs['cid'].split('|'): cid.append(i.strip()) else: cid.append(self.configs['cid']) print cid now = int(time.time()) result = {} for appid in cid: sql_title = "select title from h_appmap where appid = '%s' limit 1" % appid appmap_row = conn.fetch(sql_title) if not appmap_row['title']: appmap_row['title'] = appid line = appmap_row['title'] print line result[line] = [] rangetime = 1209600 if self.configs['attr'].has_key('rangetime'): if self.configs['attr']['rangetime'] and '|' in self.configs['attr']['rangetime']: rangetime = self.configs['attr']['rangetime'].split('|')[1] times = re.search("^([0-9]+)([a-z|A-Z]+)", rangetime) n, m = int(times.group(1)), times.group(2) if m == 'h': m = 3600 elif m == 'd': m = 86400 elif m == 'w': m = 604800 rangetime = n*m print rangetime sql_data = "select timestamp, value from h_data where appid = '%s' and timestamp > %d order by timestamp" % (appid, now-rangetime) data_rows = conn.fetchall(sql_data) for row in data_rows: result[line].append([int(row['timestamp']*1000), int(row['value'])]) #print result return self.highstock(result, self.configs['id'], self.configs['attr'])
def process_item(self, item, spider): sql = 'select * from illusts where pixiv_id = %(pixiv_id)s' value = { 'pixiv_id' : item['pixiv_id'] } db = Mysql() count = db.select(sql, value) if count > 0: raise DropItem(item['pixiv_id']) else: return item
def save(self, tocatid): self.database = Mysql(host="121.41.40.189", user="******", pwd="nMAf6wBCdRstaaabbb", db="najiaoluoabab") self.tocatid = tocatid self.sDir = "d:/uploadfile/" #图片本地目录 self.picurl = "http://imgs.najiaoluo.com/" #远程图片域名 if os.path.exists(sDir) == False: os.mkdir(sDir) # sName = sDir+str(int(time.time()))+'.txt' print('正在采集--' + self.title + '--文章') #公众号入库 isexist = "" self.wxid = 0 try: isexist = database.ExecQuery( "select id from v9_weixinhao where weixinID='" + self.wxh + "'") except Exception as e: print(e) pass if isexist: print("公众号-----> 有重复不提交!") self.wxid = isexist[0][0] else: #入库并返回id self.wxid = self.addwx() title = self.title if (title.strip() == '' or self.wxid == 0): print("标题或微信ID为空,不采集!") return isexist1 = "" try: isexist1 = self.database.ExecQuery( "select * from v9_news where title='" + title + "'") except Exception as e: print(e) pass if isexist1: print(title + '-----> 有重复不提交!') else: #无相关记录时提交数据 self.addnews()
def process_item(self, item, spider): if item['mode'] == 'daily': illust_mode = 1 else: illust_mode = 0 date = time.strftime('%Y-%m-%d') value = { 'date': date, 'mode': illust_mode, 'path': item['images'][0]['path'], 'pixiv_id': item['pixiv_id'], 'title': item['title'], 'total_score': item['total_score'], 'author': item['author'], 'image_urls': item['image_urls'], 'rank': item['rank'], 'author_id': item['author_id'] } sql = "insert into illusts(pixiv_id, title, total_score, author, image_urls, date, rank, author_id, path, mode) VALUES (%(pixiv_id)s, %(title)s, %(total_score)s, %(author)s, %(image_urls)s, %(date)s, %(rank)s, %(author_id)s, %(path)s, %(mode)s)" db = Mysql() db.action(sql, value)
def mysave(self,scatid): database=Mysql(host="121.199.48.196", user="******", pwd="rajltool321123", db="test") sDir='d:/test/' #图片地址 img_dir = 'img' if os.path.exists(sDir)==False: os.mkdir(sDir) # sName = sDir+str(int(time.time()))+'.txt' print('正在采集--'+self.title+'--文章') title = self.clearInput(self.title) m = self.clearInput(self.content) #批量替换旧内容中的图片的路径 img_patt = re.compile('src=".*?/(\w+\.\w+)"') new_m = img_patt.sub(r'src="./%s/\1"'%img_dir,m) isexist1="" try: isexist1 = database.ExecQuery("select * from v9_news where title='"+title+"'") except Exception as e: print(e) pass if isexist1: print(title+'-----> 有重复不提交!') else:#无相关记录时提交数据 content=new_m catid=scatid #保存到的栏目 typeid=0 tags=jieba.analyse.extract_tags(title, 6) keywords=(",".join(tags)) description=self.dom('.art_content').text()[0:200] url='' listorder=0 status=99 username='******' inputtime=updatetime=int(time.time()) insertbooksql ="insert into v9_news (title,catid,typeid,keywords,description,url,listorder,status,username,inputtime,updatetime) VALUES ('" \ "{title}', {catid}, {typeid}, '{keywords}', '{description}', '{url}', {listorder}, {status}, '{username}', '{inputtime}', '{updatetime}')" insert1 = insertbooksql.format(title=title, catid=catid, typeid=typeid, keywords=keywords, description=description,url=url,listorder=listorder,status=status,username=username,inputtime=inputtime,updatetime=updatetime) print(insert1) try: database.ExecNonQuery(insert1) lastid=database.cur.lastrowid paginationtype = 2 groupids_view = "" maxcharperpage = 0 template = "" insertbooksql ="insert into v9_news_data (id,content,paginationtype,groupids_view,maxcharperpage,template) VALUES ({lastid}, '{content}', {paginationtype},'{groupids_view}',{maxcharperpage},'{template}')" insert2 = insertbooksql.format(lastid=lastid, content=content, paginationtype=paginationtype,groupids_view=groupids_view,maxcharperpage=maxcharperpage,template=template) print(insert2) database.ExecNonQuery(insert2) except Exception as e: print("文章数据库保存出错,错误信息:%s" % (e) ) pass #真正下载图片 img_patt = re.compile('src="(.*?)"') img_patt = img_patt.findall(m) i =0 for img in img_patt: i+=1 #图片名称 img_name = os.path.join(img_dir,img.split('/')[-1]) #获取图片资源 if os.path.exists(sDir+img_dir)==False: os.mkdir(sDir+img_dir) #合并路径 imgpath=os.path.join(sDir,img_name) f = open(imgpath, 'wb') f.write(requests.get(img, stream=True).content) f.close()
id, run_id, project, group, percent, stat = sys.argv[1:] #stat: u(update) ro r(rollback) try: int(percent) except: l = Log(project, group, percent) else: l = Log(project, group, "%s%%" % percent) mysql_conn = Mysql( host = '127.0.0.1', user = '******', password = '******', database = 'autorelease', ) mongo_conn = Mongo( host = '127.0.0.1', database = 'log', table = 'log_'+id, ) mongo_conn.drop() mongo_conn.insert( id = id, run_id = run_id,
class Getshow(object): def __init__(self, show_id): # 参数为在vccoo上的id self.url = 'http://www.vccoo.com/v/{0}'.format(show_id) self._dom = None # 弄个这个来缓存获取到的html内容,一个蜘蛛应该之访问一次 @property def dom(self): # 获取html内容 if not self._dom: document = requests.get(self.url) document.encoding = 'utf-8' self._dom = Pq(document.text) return self._dom # 标题 @property def title(self): # 让方法可以通过s.title的方式访问 可以少打对括号 return self.dom('h1.article-title').text( ) # 关于选择器可以参考css selector或者jquery selector, 它们在pyquery下几乎都可以使用 # 内容 @property def content(self): d = Pq(self.dom('.article-content').html()) d('.main-tg-area').remove() d('.articleRecommend').remove() return self.clearInput(d.html()) # return self.dom('.article-content').html() # 直接获取html 胆子就是大 以后再来过滤 # 公众号logo @property def wxlogo(self): return self.dom('.author-name img').attr('src') #微信号 @property def wxh(self): wxlmurl = self.dom('.author-name a').attr('href') #vccoo公众号栏目页 document = requests.get(wxlmurl) document.encoding = 'utf-8' dom = Pq(document.text) return dom('.publicAccountID').text() # 公众号名称 @property def wxname(self): return self.dom('.author-name strong').text() # 公众号文章真实网址 @property def wxurl(self): return re.findall(r'var s = "(.*?)"', self.dom('body').html())[0].replace("&", "&") # print(re.findall('<title>(.*?)</title>',"dsflksl<title>sdfsdf中国</title>dsfds")[0]) # 公众号二维码 @property def wxer(self): biz = self.wxurl.split("biz=")[1].split("&mid=")[0] return "http://mp.weixin.qq.com/mp/qrcode?scene=10000004&size=100&__biz=" + biz # <meta property="og:image" content="http://mmbiz.qpic.cn/mmbiz_jpg/3oP8LV1kURibv3LAbIkk4v6pXo6xHwZVkqibO0BSdVGicA8JHicKiaJZU3Dpga2ibwa2bEfad5PchdxXSFmxv6WkECEQ/0?wx_fmt=jpeg" /> # 文章缩略图 @property def thumb(self): return re.findall(r'<meta property="og:image" content="(.*?)"', self.dom('head').html())[0] # 发布时间 @property def addtime(self): return self.dom('.author-name').text( )[-10:] # 获取tags,这里直接用text方法,再切分就行了。一般只要是文字内容,而且文字内容自己没有空格,逗号等,都可以这样弄,省事。 # 清洗数据 def clearInput(self, txt): txt = txt.replace('<!--main-tg-area-->', '') txt = txt.replace('<!-- articleRecommend/ -->', '') # txt=txt.replace('vccoo.com/refer.php?url=','') # 正则替换 txt = re.sub(r'http:\/\/img\d+\.vccoo\.com\/refer\.php\?url=', '', txt) return txt # 入库 def save(self, tocatid): self.database = Mysql(host="121.41.40.189", user="******", pwd="nMAf6wBCdRstaaabbb", db="najiaoluoabab") self.tocatid = tocatid self.sDir = "d:/uploadfile/" #图片本地目录 self.picurl = "http://imgs.najiaoluo.com/" #远程图片域名 if os.path.exists(sDir) == False: os.mkdir(sDir) # sName = sDir+str(int(time.time()))+'.txt' print('正在采集--' + self.title + '--文章') #公众号入库 isexist = "" self.wxid = 0 try: isexist = database.ExecQuery( "select id from v9_weixinhao where weixinID='" + self.wxh + "'") except Exception as e: print(e) pass if isexist: print("公众号-----> 有重复不提交!") self.wxid = isexist[0][0] else: #入库并返回id self.wxid = self.addwx() title = self.title if (title.strip() == '' or self.wxid == 0): print("标题或微信ID为空,不采集!") return isexist1 = "" try: isexist1 = self.database.ExecQuery( "select * from v9_news where title='" + title + "'") except Exception as e: print(e) pass if isexist1: print(title + '-----> 有重复不提交!') else: #无相关记录时提交数据 self.addnews() # 公众号入库 def addwx(self): title = self.wxname catid = 10 #保存到的栏目 typeid = 0 tags = jieba.analyse.extract_tags(self.wxname, 3) keywords = (",".join(tags)) description = '' url = '' listorder = 0 status = 99 username = '******' inputtime = updatetime = int(time.time()) insertbooksql = "insert into v9_weixinhao (title,catid,typeid,keywords,description,url,listorder,status,username,inputtime,updatetime) VALUES ( '{title}', {catid}, {typeid}, '{keywords}', '{description}', '{url}', {listorder}, {status}, '{username}', '{inputtime}', '{updatetime}')" insert1 = insertbooksql.format(title=title, catid=catid, typeid=typeid, keywords=keywords, description=description, url=url, listorder=listorder, status=status, username=username, inputtime=inputtime, updatetime=updatetime) print(insert1) try: self.database.cur.execute(insert1) # 附表 lastid = self.database.cur.lastrowid fenleiid = self.tocatid weixinID = self.wxh gnjs = '' wxrz = '' ndir = time.strftime("%Y/%m%d/") wxlogo = self.getimg(self.wxlogo, weixinID + "_logo.png", self.sDir + ndir, self.picurl + ndir) #下载图片 wxepic = self.getimg(self.wxer, weixinID + ".png", self.sDir + ndir, self.picurl + ndir) content = '' paginationtype = 2 groupids_view = "" maxcharperpage = 0 template = "" insertbooksql = "insert into v9_weixinhao_data (id,fenliid,weixinID,gnjs,wxrz,wxlogo,wxepic,content,paginationtype,groupids_view,maxcharperpage,template) VALUES ({lastid},{fenliid},{weixinID},'{gnjs}','{wxrz}','{wxlogo}','{wxepic}','{content}', {paginationtype},'{groupids_view}',{maxcharperpage},'{template}')" insert2 = insertbooksql.format(lastid=lastid, fenliid=fenliid, weixinID=weixinID, gnjs=gnjs, wxrz=wxrz, wxlogo=wxlogo, wxepic=wxepic, content=content, paginationtype=paginationtype, groupids_view=groupids_view, maxcharperpage=maxcharperpage, template=template) print(insert2) self.database.cur.execute(insert2) # database.cur.close() self.database.conn.commit() return self.database.cur.lastrowid print('公众号入库成功!') except Exception as e: print("公众号数据库保存出错,错误信息:%s" % (e)) # database.conn.close() self.database.conn.rollback() return 0 # 文章入库 def addnews(self): #批量替换旧内容中的图片的路径 # img_patt = re.compile('src=".*?/(\w+\.\w+)"') # new_m = img_patt.sub(r'src="./%s/\1"'%img_dir,m) title = self.title content = self.database.conn.escape( self.content) #这里对内容进行转义,提交变量时不用加',因为后面转义过后会自动加引号 catid = self.tocatid #保存到的栏目 wxid = self.wxid ndir = time.strftime("%Y/%m%d/") thumb = self.getimg(self.thumb, self.random_str(6) + ".jpg", self.sDir + "thumb/" + ndir, self.picurl + "thumb/" + ndir) #下载图片 typeid = 0 tags = jieba.analyse.extract_tags(self.title, 6) keywords = (",".join(tags)) description = Pq(self.content).text()[0:200] url = '' listorder = 0 status = 99 username = '******' inputtime = updatetime = int(time.time()) insertbooksql = "insert into v9_news (title,catid,wxid,thumb,typeid,keywords,description,url,listorder,status,username,inputtime,updatetime) VALUES ( '{title}',{catid},{wxid}, '{thumb}',{typeid}, '{keywords}', '{description}', '{url}',{listorder},{status}, '{username}', '{inputtime}', '{updatetime}')" insert1 = insertbooksql.format(title=title, catid=catid, wxid=wxid, thumb=thumb, typeid=typeid, keywords=keywords, description=description, url=url, listorder=listorder, status=status, username=username, inputtime=inputtime, updatetime=updatetime) print(insert1) try: #这是用到了事务处理 self.database.cur.execute(insert1) lastid = self.database.cur.lastrowid paginationtype = 2 groupids_view = "" maxcharperpage = 0 template = "" insertbooksql = "insert into v9_news_data (id,content,paginationtype,groupids_view,maxcharperpage,template) VALUES ({lastid}, {content}, {paginationtype},'{groupids_view}',{maxcharperpage},'{template}')" insert2 = insertbooksql.format(lastid=lastid, content=content, paginationtype=paginationtype, groupids_view=groupids_view, maxcharperpage=maxcharperpage, template=template) print(insert2) self.database.cur.execute(insert2) # database.cur.close() self.database.conn.commit() print('文章入库成功!') except Exception as e: print("文章数据库保存出错,错误信息:%s" % (e)) # database.conn.close() self.database.conn.rollback() # 获取远程图片保存到本地,返回图片网址 # imgUrl:要下载的远程图片 filename:保存的图片名 tourl:要保存的本地目录 neturl:图片网址 def getimg(self, imgUrl, filename, tourl, neturl): if filename: local_filename = filename else: local_filename = imgUrl.split('/')[-1] print("Download Image File=", local_filename) if os.path.exists(tourl) == False: os.makedirs(tourl) # 这里是cookie的模拟方法,需要模拟登录 # headers = { # "Host": "techinfo.subaru.com", # "User-Agent": "lol", # "Cookie": "JSESSIONID=F3CB4654BFC47A6A8E9A1859F0445123" # } # r = requests.get(url, stream=True, headers=headers) r = requests.get( imgUrl, stream=True) # here we need to set stream = True parameter with open(tourl + local_filename, 'wb') as f: try: for chunk in r.iter_content( chunk_size=1024 ): #大图片断点续传 1024 是一个比较随意的数,表示分几个片段传输数据。 if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() #刷新也很重要,实时保证一点点的写入。 f.close() except Exception as e: print("图片下载出错") f.close() return return neturl + local_filename # 生成num位随机字符串 def random_str(self, num): li = [] for i in range(int(num)): r = random.randrange(0, 5) if i == r: num = random.randrange(0, 10) li.append(str(num)) else: temp = random.randrange(65, 91) c = chr(temp) li.append(c) result = "".join(li) return result
def main(id, run_id, project, group, percent, stat): """ A demo daemon main routine, write a datestamp to /tmp/daemon-log every 10 seconds. """ import time mysql_conn = Mysql( host='127.0.0.1', user='******', password='******', database='autorelease', ) mongo_conn = Mongo( host='127.0.0.1', database='log', table='log_' + id, ) #stat: u(update) ro r(rollback) try: int(percent) except: l = Log(project, group, percent) else: l = Log(project, group, "%s%%" % percent) content = l.load() mongo_conn.drop() mongo_conn.insert( id=id, run_id=run_id, content="", update_time=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time()))) ) while True: content = l.load() result = os.popen("/usr/local/ctier/pkgs/ctl-3.6.1/bin/ctl-queue").read() if run_id in result: mongo_conn.update(condition={'id': id}, data={"content": content}) continue else: mongo_conn.update(condition={'id': id}, data={"content": content}) if stat == 'u': mysql_conn.save( "update task_content set finish_time = '%s', status = '102' where id = '%s'" % (int(time.time()), id)) time.sleep(10) send("", id, project, 102) #history content_result = Content.objects.filter(id=id, project=project, env=percent).order_by('-finish_time').values()[0] h = History( task_id=content_result['id'], type=content_result['type'], project=content_result['project'], env=content_result['env'], run_id=content_result['run_id'], version=content_result['version'], status=content_result['status'], deploy_time=content_result['deploy_time'], finish_time=content_result['finish_time'], create_user=content_result['create_user'], deploy_user=content_result['deploy_user'], ) h.save() elif stat == 'r': mysql_conn.save( "update task_content set finish_time = '%s', status = '105' where id = '%s'" % (int(time.time()), id)) time.sleep(10) rollback_history_id = Rollback.objects.filter(task_id=id).order_by('-start_time').values()[0]['id'] Rollback.objects.filter(id=rollback_history_id, task_id=id).update(finish_time=int(time.time())) send("", id, project, 105) exit() time.sleep(10)
#encoding=utf-8 __author__ = 'jophyyao' import sys, os, time, re sys.path.insert(0, '../') from controltier.node import Node from tools.mysql import Mysql mysql_conn = Mysql( host = '127.0.0.1', user = '******', password = '******', database = 'autorelease', ) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time()))) for project in os.listdir('/usr/local/ctier/ctl/projects'): n = Node(project) for k, v in n.analysis().iteritems(): result = mysql_conn.fetch("select count(id) from project_node where project = '%s' and hostname = '%s'" % (project, v['hostname'])) if result['count(id)']: mysql_conn.save(""" update project_node set name = '{name}', description = '{description}', tags = '{tags}', ctlusername = '******', osfamily = '{osfamily}', osname = '{osname}',
class Getshow(object): def __init__(self, url): # 参数为在id self.url = url self._dom = None # 弄个这个来缓存获取到的html内容,一个蜘蛛应该之访问一次 @property def dom(self): # 获取html内容 if not self._dom: document = requests.get(self.url) document.encoding = 'utf-8' self._dom = Pq(document.text) return self._dom # 标题 @property def title(self): # 让方法可以通过s.title的方式访问 可以少打对括号 return self.clearInput(self.dom('title').text( )) # 关于选择器可以参考css selector或者jquery selector, 它们在pyquery下几乎都可以使用 # 内容 @property def content(self): return self.clearInput(self.dom('.nr').html()) # 直接获取html 胆子就是大 以后再来过滤 def mysave(self, tocatid): self.database = Mysql(host="121.199.48.196", user="******", pwd="rajltool321123", db="m_wxhs120_com") self.tocatid = tocatid # self.sDir = "d:/uploadfile/"#图片本地目录 # self.sDir = "/mnt/xvdb1/virtualhost/vmO2xqlA/uploadfile/"#图片本地目录 # self.picurl = "http://imgs.najiaoluo.com/"#远程图片域名 # if os.path.exists(self.sDir)==False: # os.mkdir(sDir) # os.chmod(sDir,0o777) #其实makedirs默认就是777权限,不知为什么不可以 # sName = sDir+str(int(time.time()))+'.txt' print('正在采集--' + self.title + '--文章') title = self.title.encode('gbk', 'ignore').decode('gbk') if (title.strip() == ''): print("标题,不采集!") return isexist1 = "" try: sql = "select id from v9_news where title='%s' and catid='%s' order by title desc" % ( title, self.tocatid) # print(sql) isexist1 = self.database.ExecQuery(sql) except Exception as e: print("查询信息出错,错误信息:%s" % (e)) pass if isexist1: print(title + '-----> 有重复不提交!') else: #无相关记录时提交数据 # pass self.addnews() # 文章入库 def addnews(self): #批量替换旧内容中的图片的路径 title = self.title.encode('gbk', 'ignore').decode('gbk') # content=(self.content) #这里对内容进行转义,提交变量时不用加',因为后面转义过后会自动加引号 content = self.database.conn.escape( self.content.encode( 'gbk', 'ignore').decode('gbk')) #这里对内容进行转义,提交变量时不用加',因为后面转义过后会自动加引号 catid = self.tocatid #保存到的栏目 # weixinid=str(self.wxid) ndir = time.strftime("%Y/%m%d/") # nthumb="http://img03.sogoucdn.com/net/a/04/link?appid=100520034&url="+self.thumb #这里对大图片进行缩放到512宽 id改100520034为300 100520031为121 # thumb=self.getimg(nthumb,self.random_str(6)+".jpg",self.sDir+"thumb/"+ndir,self.picurl+"thumb/"+ndir) #下载图片 thumb = "" typeid = 0 tags = jieba.analyse.extract_tags(title, 6) keywords = (",".join(tags)) description = Pq(self.content).text()[0:180].encode( 'gbk', 'ignore').decode('gbk') url = '' listorder = 0 status = 99 username = '******' inputtime = updatetime = int(time.time()) insertbooksql = "insert into v9_news (title,catid,thumb,typeid,keywords,description,url,listorder,status,username,inputtime,updatetime) VALUES ( '{title}',{catid}, '{thumb}',{typeid}, '{keywords}', '{description}', '{url}',{listorder},{status}, '{username}', {inputtime}, {updatetime})" insert1 = insertbooksql.format(title=title, catid=catid, thumb=thumb, typeid=typeid, keywords=keywords, description=description, url=url, listorder=listorder, status=status, username=username, inputtime=inputtime, updatetime=updatetime) # print(insert1) try: #这是用到了事务处理 self.database.cur.execute(insert1) lastid = self.database.cur.lastrowid paginationtype = 2 groupids_view = "" maxcharperpage = 0 template = "" insertbooksql = "insert into v9_news_data (id,content,paginationtype,groupids_view,maxcharperpage,template) VALUES ({lastid}, {content}, {paginationtype},'{groupids_view}',{maxcharperpage},'{template}')" insert2 = insertbooksql.format(lastid=lastid, content=content, paginationtype=paginationtype, groupids_view=groupids_view, maxcharperpage=maxcharperpage, template=template) # print(insert2) self.database.cur.execute(insert2) #新增hits表这里modelid=12 文章modelid=1 hitsid = "c-1-" + str(lastid) insertsql = "INSERT INTO `v9_hits`(`hitsid`,`catid`,`updatetime`) VALUES ('{hitsid}',{catid},{updatetime}) " insert3 = insertsql.format(hitsid=hitsid, catid=catid, updatetime=updatetime) # print(insert3) self.database.cur.execute(insert3) sql = "select url from v9_category where catid=" + str( catid) + " order by catid desc" isurl = self.database.ExecQuery(sql) # print(isurl) # #更新文章主表url url = str(isurl[0][0]) + str(lastid) + ".html" # # print(url) insertsql = "update `v9_news` set url='{url}' where id = {lastid} order by id desc" insert4 = insertsql.format(url=url, lastid=lastid) # print(insert4) self.database.cur.execute(insert4) # database.cur.close() self.database.conn.commit() print('文章%s入库成功!' % title) except Exception as e: print("文章%s数据库保存出错,错误信息:%s" % (title, e)) # database.conn.close() self.database.conn.rollback() # with open(sName,'wb') as file: # file.write(new_m.encode()) # file.close() def clearInput(self, txt): txt = txt.replace('白求恩医学基金定点:', '') txt = txt.replace('连续10年荣获国家A级医院:', '') txt = txt.replace('被评为国家示范妇科科研基地、国家妇科疾病重点诊疗基地,更是连续10年被评为"A级妇科医院', '') txt = txt.replace('丽水市囿山路568号(民政局旁)', '无锡市锡山区东亭二泉东路195号') txt = txt.replace('丽水慈爱医院', '无锡华山医院') txt = txt.replace('丽水', '无锡') txt = txt.replace('慈爱', '华山') txt = txt.replace('湖南', '湖南') txt = txt.replace('0578-2292111', '0510-88200585') txt = txt.replace('05782292111', '051088200585') txt = txt.replace('0578-23292111', '0510-88200585') txt = txt.replace('057823292111', '051088200585') txt = txt.replace('972963352', '493709817') txt = txt.replace('预约68元妇科检查套餐', '预约0元妇科检查套餐') txt = txt.replace('专家', '医生') txt = txt.replace('24年', '') txt = txt.replace('非营利性', '专业') txt = txt.replace('着名', '专业') txt = txt.replace('白求恩医学基金无锡唯一定点医院', '瑞安专业医院') txt = txt.replace('世界', '') txt = txt.replace('白求恩医学基金无锡唯一定点医院', '瑞安专业医院') txt = txt.replace('德国蓝氧净疗杀菌技术', '华山妇科炎症治疗技术') txt = txt.replace('德国O3蓝氧净疗技术', '华山妇科炎症治疗技术') txt = txt.replace('权威', '专业') txt = txt.replace('汪爱云,女,1949年生,从事妇科临床、教学工作四十余年,并多次在国内着名的三甲医院研究深造。', '从事妇科临床、教学工作二十余年') txt = txt.replace('临床经验超过40年', '临床经验超过20年') txt = txt.replace('汪爱云主任', '李医生') txt = txt.replace('王爱云主任', '李医生') txt = txt.replace('汪爱云', '李医生') txt = txt.replace('王爱云', '李医生') txt = txt.replace('陈汉娇', '李医生') txt = txt.replace('陈向宇', '李医生') txt = txt.replace('楼美丽', '李医生') txt = txt.replace('68元妇科六项套餐 关爱健康从体检开始', '0元妇科检查套餐 关爱健康从检查开始') txt = txt.replace('68元六大项妇科检查', '0元妇科检查套餐') txt = txt.replace('68元', '0元') txt = txt.replace('熊国伟', '曹医生') txt = txt.replace('董广胜', '曹医生') txt = txt.replace('李涛', '曹医生') txt = txt.replace('王益鑫', '曹医生') txt = txt.replace('包皮环切术只需580元是吗', '华山包皮环切术有优惠哦') txt = txt.replace('包皮环切术', '华山包皮环切术') txt = txt.replace('副主任医师/博士后', '') txt = txt.replace( '男,泌尿外科副主任医师,医学博士后。在国内较早开展前列腺癌表观遗传、微小RNA的研究,现为上海泌尿男科学会青年会员。', '') txt = txt.replace('副主任医师/博士后', '') txt = txt.replace('上海同济医院泌尿外科', '泌尿外科') txt = txt.replace('副教授', '') txt = txt.replace('博士后', '') txt = txt.replace('公立甲等', '') txt = txt.replace('沪浙', '') txt = txt.replace('著名', '') txt = txt.replace('沪浙', '') txt = txt.replace('沪浙', '') txt = txt.replace('李医生、李医生、李医生主任', '李医生') txt = txt.replace('白求恩基金会携手', '') txt = txt.replace('白求恩基金会', '') txt = txt.replace('40年', '20年') txt = txt.replace( ',被评为国家示范妇科科研基地、国家科学技术进步奖二等奖,不孕不育重点诊疗基地、全国十佳妇科医院,更是连续10年被评为国家A级妇科医院', '') txt = txt.replace('阴茎背神经选择性切断术', '华山早泄治疗术') txt = txt.replace('阴茎助勃器植入术', '华山阳痿治疗术') txt = txt.replace('检查价格仅需30元', '常规检查价格0元') txt = txt.replace('30元', '0元') # 正则替换 # text=re.sub('\[[0-9]*\]','',text) # txt=re.sub(r"<img[^>]+src\s*=(\s*)['\"]([^'\"]+)['\"][^>]*>","<a href=\"/swt\" rel=\"nofollow\"><img src=\"\\2\" /></a>",txt) txt = re.sub( r"<img[^>]+src\s*=(\s*)['\"]([^'\"]+)['\"][^>]*>", "<a href=\"/swt\" rel=\"nofollow\"><img src=\"http://m.wxhs120.com/uploadfile/2015/1010/20151010085553617.gif\" /></a>", txt) return txt