def InsData(self, url, newskind, DateDif): res = Func.GetHeader(url) if res == '': print 'Time Out!' else: soup = bs(res, 'html.parser') souptitle = soup.find('h2', attrs={'class': 'news_title'}) title = souptitle.text timesoup = soup.find('span', attrs={'class': 'gray9 mgl10'}) timestr = timesoup.text newstime = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M') source = '' cmt = 0 author = '' txtsoup = soup.find('div', attrs={'class': 'newsDetail_txt'}) newstxt = txtsoup.text.strip() now = datetime.datetime.now() - datetime.timedelta(days=DateDif) currdate = newstime if (now.year == currdate.year and now.month == currdate.month and now.day == currdate.day) or DateDif == -1: conn = MySQLdb.connect(host='.', port=3306, user='******', passwd='123456', db='news_info', charset='utf8') cur = conn.cursor() #insert数据 dictypename = {1: '双色球', 2: '福彩3D', 3: "七彩乐"} sql = 'insert into tbl_news_info(title,newstype,source,pubtime,typename,webname,newsurl,contenttxt,joincount,author) select \'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',%d,\'%s\' from DUAL' % ( title, '其它', source, newstime, dictypename[newskind], '澳客网', url, newstxt, cmt, author) try: cur.execute(sql) cur.close() conn.commit() conn.close() except Exception, e: print 'str(Exception):\t', str(Exception) print 'str(e):\t\t', str(e) print 'repr(e):\t', repr(e) print 'e.message:\t', e.message print 'traceback.print_exc():' traceback.print_exc() print 'traceback.format_exc():\n%s' % traceback.format_exc( ) print 'url:%s' % url print '########################################################' finally: pass
def InsData(self, url, newskind, newstype, DateDif): res = Func.GetHeader(url) if res == '': print 'Time Out!' else: soup = bs(res, 'html.parser') soupinfo = soup.find('div', attrs={'class', 'w700 fl'}) souptitle = soupinfo.find('dt') title = souptitle.text.strip() infolist = soup.find('dd', attrs={'class': 'dd_time'})('span') timestr = infolist[0].text.replace('发布时间:', '') source = infolist[1].text.replace('来源:', '') newstime = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M') soupcon = soup.find('dd', attrs={'class': 'dd_content'}) newstxt = '' for con in soupcon('p'): newstxt = newstxt + con.text.strip() cmt = 0 author = '' now = datetime.datetime.now() - datetime.timedelta(days=DateDif) currdate = newstime if (now.year == currdate.year and now.month == currdate.month and now.day == currdate.day) or DateDif == -1: conn = MySQLdb.connect(host='.', port=3306, user='******', passwd='123456', db='news_info', charset='utf8') cur = conn.cursor() #insert数据 dictypename = {1: '新闻中心', 2: '福彩公益', 3: '彩站风采', 4: '彩友之家'} if newskind == 1: dicttype = {1: '福彩要闻', 2: '公告公示', 3: '福彩活动'} elif newskind == 2: dicttype = {1: '公益新闻', 2: '公益活动', 3: '政策法规', 4: '公益掠影'} elif newskind == 3: dicttype = {1: '福彩动态', 2: '江城名站', 3: '站长之星'} elif newskind == 4: dicttype = { 1: '江城彩讯', 2: '双色球', 3: '福彩3d', 4: '七乐彩', 5: '30选5', 6: '快3', 7: '刮刮乐', 8: '无纸化' } sql = 'insert into tbl_news_info(title,newstype,source,pubtime,typename,webname,newsurl,contenttxt,joincount,author) select \'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',%d,\'%s\' from DUAL' % ( title, dicttype[newstype], source, newstime, dictypename[newskind], '快乐福彩', url, newstxt, cmt, author) try: cur.execute(sql) cur.close() conn.commit() conn.close() except Exception, e: print 'str(Exception):\t', str(Exception) print 'str(e):\t\t', str(e) print 'repr(e):\t', repr(e) print 'e.message:\t', e.message print 'traceback.print_exc():' traceback.print_exc() print 'traceback.format_exc():\n%s' % traceback.format_exc( ) print 'url:%s' % url print '########################################################' finally: pass