def query(self, table, showcol, con_args): if len(showcol) == 0: showcol = '*' else: showcol = ','.join(showcol) if isinstance(con_args, {}): con_args = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1') else: con_args = 'WHERE 1=1' + con_args try: self.db.set_character_set('utf8') sql = "SELECT %s FROM %s %s" % (showcol, table, con_args) try: result = self.cur.execute(sql) if not result: return [] return self.cur.fetchall() except MySQLdb.Error, e: # 发生错误时回滚 self.db.rollback() print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1]) except MySQLdb.Error, e: print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
def __init__(self): try: self.db = MySQLdb.connect(IPADDRESS, USERNAME, PASSWORD, DBNAME) self.cur = self.db.cursor() except MySQLdb.Error, e: print BasicTool.getCurrentTime(), "连接数据库错误,原因%d: %s" % (e.args[0], e.args[1])
def update(self, table, change_col, con_args): change_col = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), change_col.items(), '')[1:] if isinstance(con_args, {}): con_args = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1') else: con_args = 'WHERE 1=1' + con_args try: self.db.set_character_set('utf8') sql = "UPDATE %s SET %s %s" % (table, change_col, con_args) try: return self.cur.execute(sql) except MySQLdb.Error, e: # 发生错误时回滚 self.db.rollback() print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1]) except MySQLdb.Error, e: print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
def delete(self, table, con_args): if isinstance(con_args, {}): condition = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1') else: condition = 'WHERE 1=1' + con_args try: self.db.set_character_set('utf8') sql = "DELETE FROM %s %s" % (table, condition) try: return self.cur.execute(sql) except MySQLdb.Error, e: # 发生错误时回滚 self.db.rollback() print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1]) except MySQLdb.Error, e: print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
def insertCrawlerRecord(mysql,kwargs): crawlerRecord=CrawlerRecord() mysqlSession = mysql.session crawlerRecord.title=str.strip(kwargs['title'].encode('utf8')) crawlerRecord.abstract=str.strip(kwargs['abstract'].encode('utf8')) crawlerRecord.crawler_url=str.strip(kwargs['crawler_url'].encode('utf8')) crawlerRecord.gmt_crawler=kwargs['gmt_crawler'] crawlerRecord.gmt_created=kwargs['gmt_created'] crawlerRecord.url_key=BasicTool.md5(str.strip(kwargs['crawler_url'])) crawlerRecord.title_key=BasicTool.md5(str.strip(kwargs['title'].encode('utf8'))) crawlerRecord.sch_id=kwargs['sch_id'] crawlerRecord.alumni_id=kwargs['alumni_id'] try: mysqlSession.add(crawlerRecord) mysqlSession.commit() spider.crawlerTask.crawlerNum.add() except sqlalchemy.exc.IntegrityError,e: mysqlSession.rollback() CrawlerTool.logger.error(e)
def insert(self, table, my_dict): try: self.db.set_character_set('utf8') cols = ', '.join(my_dict.keys()) values = '"," '.join(my_dict.values()) sql = "INSERT INTO %s (%s) VALUES (%s)" % (table, cols, '"' + values + '"') try: result = self.cur.execute(sql) insert_id = self.db.insert_id() self.db.commit() # 判断是否执行成功 if result: return insert_id else: return 0 except MySQLdb.Error, e: # 发生错误时回滚 self.db.rollback() # 主键唯一,无法插入 if "key 'PRIMARY'" in e.args[1]: print BasicTool.getCurrentTime(), "数据已存在,未插入数据" else: print BasicTool.getCurrentTime(), "插入数据失败,原因 %d: %s" % (e.args[0], e.args[1]) except MySQLdb.Error, e: print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
abstract += sibling else: abstract += sibling.get_text()#re.search(r'<.*>(.*)</.*>',sibling.get_text()) #print type(sibling) #print sibling.name abstract=abstract[:-5] print gmt_created print title print crawler_url print abstract gmt_crawler = datetime.datetime.now()#BasicTool.getCurrentTime() crawlerRecord=CrawlerRecord() crawlerRecord.title=title crawlerRecord.abstract=abstract crawlerRecord.crawler_url=crawler_url crawlerRecord.gmt_crawler=gmt_crawler crawlerRecord.gmt_created=gmt_created crawlerRecord.url_key=BasicTool.md5(crawler_url) crawlerRecord.sch_id=1 crawlerRecord.alumni_id=1 mysql=MySQL() mysql.session.add(crawlerRecord) mysql.session.commit() mysql.session.close() # tt=(nt.strftime('%Y年%m月%d日 %H时%M分%S秒')) # print tt # timeArray = time.strptime(tt, "%Y年%m月%d日 %H时%M分%S秒") # print timeArray
def crawlerSogou(lst): for t in lst: words = BasicTool.searchWord(t) sogou = Sogou(word=words, sch_id=t.graduate, alumni_id=t.alumni_id) sogou.parseHtml()
def crawlerBaidu(lst): for t in lst: words=BasicTool.searchWord(t) baidu = Baidu(word=words, sch_id=t.graduate, alumni_id=t.alumni_id) baidu.parseHtml()