class BasicArticleCrawler(object): def __init__(self): self.mysql_client = MysqlClient() self.mongo_client = MongoClient().tdb.tcoll def insertSuccess(self, msg): """ success crawle the article msg, insert into the successed db, insert into mongodb """ try: self.mysql_client.begin() # print article # print msg["url"] article = self.mysql_client.getOne( "select * from failed_url where url=%s", (msg["url"], )) if article != False: article = self.mysql_client.delete( "delete from failed_url where url=%s", (msg["url"], )) LOGGER.info("delete the article from failed_url: %s", msg["url"]) article = self.mysql_client.getOne( "select * from successed_url where url=%s", (msg["url"], )) if article != False: LOGGER.info("repeat crawler the article give up save: %s", msg["url"]) return self.mongo_client.save(msg) LOGGER.debug("insert into mongo: %s@%s" % (msg["title"], msg["url"])) self.mysql_client.insertOne("insert into successed_url(url, tag, sub_tag, version, create_time) values(%s, %s, %s, %s, %s)", \ (msg["url"], msg["tag"], msg["sub_tag"], VERSION, msg["create_time"])) LOGGER.debug("insert successed_url %s" % (msg["url"], )) self.mysql_client.end("commit") except Exception, e: self.mysql_client.end("rollback") self.mysql_client.begin() self.insertFailed(msg) LOGGER.error("insert into mongo/successed_url error: %s" % (msg["url"])) LOGGER.error(traceback.format_exc())
class BasicArticleCrawler(object): def __init__(self): self.mysql_client = MysqlClient() self.mongo_client = MongoClient().tdb.tcoll def insertSuccess(self, msg): """ success crawle the article msg, insert into the successed db, insert into mongodb """ try: self.mysql_client.begin() # print article # print msg["url"] article = self.mysql_client.getOne("select * from failed_url where url=%s", (msg["url"], )) if article != False: article = self.mysql_client.delete("delete from failed_url where url=%s", (msg["url"], )) LOGGER.info("delete the article from failed_url: %s", msg["url"]) article = self.mysql_client.getOne("select * from successed_url where url=%s", (msg["url"], )) if article != False: LOGGER.info("repeat crawler the article give up save: %s", msg["url"]) return self.mongo_client.save(msg) LOGGER.debug("insert into mongo: %s@%s" %(msg["title"], msg["url"])) self.mysql_client.insertOne("insert into successed_url(url, tag, sub_tag, version, create_time) values(%s, %s, %s, %s, %s)", \ (msg["url"], msg["tag"], msg["sub_tag"], VERSION, msg["create_time"])); LOGGER.debug("insert successed_url %s" %(msg["url"], )) self.mysql_client.end("commit") except Exception, e: self.mysql_client.end("rollback") self.mysql_client.begin() self.insertFailed(msg) LOGGER.error("insert into mongo/successed_url error: %s" %(msg["url"])) LOGGER.error(traceback.format_exc())