def __init__(self, config):
     '''
     Constructor
     '''
     self.url = config.get("url", "")
     self.tag = config.get("tag", "defaut tag")
     self.sub_tag = config.get("sub_tag", None)
     self.mysql_client = MysqlClient()
     self.news_publisher = NewsPublisher(NEWS_URL_QUEUE)
Exemple #2
0
 def run(self):
     """
     """
     self.mysql_client = MysqlClient()
     self.news_publisher = NewsPublisher(NEWS_URL_QUEUE)
     self.threhold = 5
     self.page = 14
     LOGGER.info("start re extractor the failed url if count() < %s" %
                 (self.threhold, ))
     failed_count = 0
     try:
         failed_count = self.mysql_client.getOne(
             "select count(*) as c from failed_url where count < %s",
             (self.threhold, ))
     except Exception, e:
         LOGGER.error("failed to load the failed url count")
         LOGGER.error(traceback.format_exc())
Exemple #3
0
 def __init__(self):
     self.mysql_client = MysqlClient()
     self.mongo_client = MongoClient().tdb.tcoll
Exemple #4
0
@author: lml
'''

import sys
sys.path.append("../")
sys.path.append("../../")
sys.path.append("/home/lml/webcrawler/webcrawler-nlp/crawler/")
from utils.dbmong import MongoClient
from utils.dbmysql import MysqlClient
from time import sleep

if __name__ == '__main__':
    while True:
        print "**************************************************"
        mysql_client = MysqlClient()
        mongo_client = MongoClient()
        published_url_count = mysql_client.getOne(
            "select count(*) as count from published_url")
        print "published url count: %s" % published_url_count["count"]

        successed_url_count = mysql_client.getOne(
            "select count(*) as count from successed_url")
        print "successed url count: %s" % successed_url_count["count"]

        failed_url_count = mysql_client.getOne(
            "select count(*) as count from failed_url")
        print "failed url count: %s" % failed_url_count["count"]

        count = mongo_client.tdb.tcoll.count()
        print "mongo articles: %s" % count