Exemple #1
0
class BasicArticleCrawler(object):
    def __init__(self):
        self.mysql_client = MysqlClient()
        self.mongo_client = MongoClient().tdb.tcoll

    def insertSuccess(self, msg):
        """
        success crawle the article msg, insert into the successed db, insert into mongodb
        """
        try:
            self.mysql_client.begin()
            #             print article
            #             print msg["url"]

            article = self.mysql_client.getOne(
                "select * from failed_url where url=%s", (msg["url"], ))
            if article != False:
                article = self.mysql_client.delete(
                    "delete from failed_url where url=%s", (msg["url"], ))
                LOGGER.info("delete the article from failed_url: %s",
                            msg["url"])

            article = self.mysql_client.getOne(
                "select * from successed_url where url=%s", (msg["url"], ))
            if article != False:
                LOGGER.info("repeat crawler the article give up save: %s",
                            msg["url"])
                return

            self.mongo_client.save(msg)
            LOGGER.debug("insert into mongo: %s@%s" %
                         (msg["title"], msg["url"]))

            self.mysql_client.insertOne("insert into successed_url(url, tag, sub_tag, version, create_time) values(%s, %s, %s, %s, %s)",  \
                                        (msg["url"], msg["tag"], msg["sub_tag"], VERSION, msg["create_time"]))

            LOGGER.debug("insert successed_url %s" % (msg["url"], ))
            self.mysql_client.end("commit")

        except Exception, e:
            self.mysql_client.end("rollback")

            self.mysql_client.begin()
            self.insertFailed(msg)
            LOGGER.error("insert into mongo/successed_url error: %s" %
                         (msg["url"]))
            LOGGER.error(traceback.format_exc())
class  BasicArticleCrawler(object):
    
    def __init__(self):
        self.mysql_client = MysqlClient()
        self.mongo_client = MongoClient().tdb.tcoll
    
    def insertSuccess(self, msg):
        """
        success crawle the article msg, insert into the successed db, insert into mongodb
        """
        try:
            self.mysql_client.begin()
#             print article
#             print msg["url"]
            
            article = self.mysql_client.getOne("select * from failed_url where url=%s", (msg["url"], ))
            if article != False:
                article = self.mysql_client.delete("delete from failed_url where url=%s", (msg["url"], ))
                LOGGER.info("delete the article from failed_url: %s", msg["url"])

            article = self.mysql_client.getOne("select * from successed_url where url=%s", (msg["url"], ))
            if article != False:
                LOGGER.info("repeat crawler the article give up save: %s", msg["url"])
                return
            
            self.mongo_client.save(msg)
            LOGGER.debug("insert into mongo: %s@%s" %(msg["title"], msg["url"]))
            
            self.mysql_client.insertOne("insert into successed_url(url, tag, sub_tag, version, create_time) values(%s, %s, %s, %s, %s)",  \
                                        (msg["url"], msg["tag"], msg["sub_tag"], VERSION, msg["create_time"]));
                                        
            LOGGER.debug("insert successed_url %s" %(msg["url"], ))
            self.mysql_client.end("commit")

        except Exception, e:
            self.mysql_client.end("rollback")

            self.mysql_client.begin()
            self.insertFailed(msg)
            LOGGER.error("insert into mongo/successed_url error: %s"  %(msg["url"]))
            LOGGER.error(traceback.format_exc())