Esempio n. 1
0
def insertMango(ym):
    year=ym[:4]
    month=ym[4:]
    conn=Connection('10.18.10.41', 27018)
    print conn.alive()
    db=conn.op_production
    print db.collection_names()
#    col = db.monthly_stats
    col = db.marketinfos
#    col.remove({"year":"2013"})
    docs = col.find()
    count = docs.count()
    print count
    for k in docs:
        print k
        break
Esempio n. 2
0
def getMango(ym):    
    conn=Connection('10.18.10.41', 27018)
    print conn.alive()
    db=conn.op_production
    print db.collection_names()
    col = db.app_op_exts
    docs=col.find()
    print docs.count()
    wfile = open('/home/xiarong/appDB/data/app_op_exts_mangoDB_'+ym,'w')
    for k in docs:
        try:
            wfile.write(str(k).encode('utf-8')+'\n')
        except:
            print k
            continue
    wfile.close()
Esempio n. 3
0
def getMango(ym):
    conn = Connection('10.18.10.41', 27018)
    print conn.alive()
    db = conn.op_production
    print db.collection_names()
    col = db.app_op_exts
    docs = col.find()
    print docs.count()
    wfile = open('/home/xiarong/appDB/data/app_op_exts_mangoDB_' + ym, 'w')
    for k in docs:
        try:
            wfile.write(str(k).encode('utf-8') + '\n')
        except:
            print k
            continue
    wfile.close()
Esempio n. 4
0
def getMango():    
    conn=Connection('10.18.10.41', 27018)
    print conn.alive()
    db=conn.op_production
    print db.collection_names()
    #col = db.app_op_exts
    col = db.marketinfos
    obid= ['5130123f527015019e00001e','4ded91c2431fe33688000402']
#    obid= ObjectId('4e2562a9431fe3485b0002cd')
    docs=col.find({'appkey':obid[1]})
    print docs.count()
    for k in docs:
        try:
            print k
        except:
            print k
            continue
Esempio n. 5
0
class MonitorCronJob(object):
    def __init__(self):
        self.mongo_host = MONGO_HOST
        self.mongo_port = MONGO_PORT
        self.redis_host = REDIS_HOST
        self.redis_port = REDIS_PORT
        self.rmap = REDIS_MAP
        self.emap = EXTRA_MAP
        self.ulist = '%s:start_urls' % SPIDER
        self.dupefilter_key = '%s:dupefilter' % SPIDER

        self._connectDB()
        self._connectRedis()
        self._cleanRedis()

    def __del__(self):
        self._disconnectDB()

    def _connectDB(self):
        self.c = Connection(self.mongo_host, self.mongo_port)
        self.db = self.c[TASK_DB]
        self.collection = self.db[TASK_COLLECTION]
        print "[log] Connect to MongoDB %s:%s" % (self.mongo_host,
                                                  self.mongo_port)

    def _disconnectDB(self):
        self.c.disconnect()
        del self.c, self.db, self.collection
        print "[log] Disconnect from MongoDB %s:%s" % (self.mongo_host,
                                                       self.mongo_port)

    def _alive(self):
        return True if self.c and self.c.alive() else False

    def _connectRedis(self):
        self.r = redis.Redis(host=self.redis_host, port=self.redis_port)
        print "[log] Connect to Redis %s:%s" % (self.redis_host,
                                                self.redis_port)

    def _cleanRedis(self):
        self.r.delete(self.rmap)
        print "[log] Clean url hash map of %s" % self.rmap
        self.r.delete(self.emap)
        print "[log] Clean extra url hash map of %s" % self.emap
        self.r.delete(self.dupefilter_key)
        print "[log] Clean dupefilter_key of %s" % self.dupefilter_key

    def read_task(self, rto_type='cursor'):
        """ Get tasks from MongoDB, then return a list
		"""
        if self._alive():

            cursor = self.collection.find({'state': 1})

            if rto_type == 'cursor':
                print "[log] Read %d tasks from DB" % cursor.count()
                return cursor

            elif rto_type == 'list':
                tasks = []
                for i in cursor:
                    tasks.append(i)
                print "[log] Read %d tasks from DB" % len(tasks)
                return tasks

        else:
            pass

    def map_tasks(self):
        """ 
		Map tasks' url and SKU
		----------------------

		Task's structure
		{
			'sku' : string, product sku of feifei,
			'urls' : list, list of this task's urls
		}
		"""
        def to_redis(url):
            self.r.lpush(self.ulist, url)

        tasks = self.read_task('cursor')

        for i in tasks:
            sku = i.get('sku', None)
            urls = i.get('urls', None)
            extras = i.get('extras', None)
            if sku:
                if urls:
                    for url in urls:
                        self.r.hset(self.rmap, url,
                                    sku)  # hset(hash, key, value)
                        to_redis(url)
                        print "[log] List %s to %s" % (url, self.ulist)
                if extras:
                    for extra in extras:
                        self.r.hset(self.emap, extra, sku)
                        to_redis(extra)
                        print "[log] List %s to %s" % (extra, self.ulist)

    def test(self):

        with open('test.txt', 'ab') as f:
            f.write('1\n')

    def __getattribute__(self, name):
        try:
            rt = object.__getattribute__(self, name)
        except:
            rt = None
        return rt
Esempio n. 6
0
class MongoDB(object):


    def __init__(self):
        self.plugin_name = "mongo"
        self.mongo_host = "127.0.0.1"
        self.mongo_port = 27017
        self.mongo_dbs = ["admin", ]
        self.mongo_user = None
        self.mongo_password = None

        self.includeConnPoolMetrics = None
        self.includeServerStatsMetrics = None
        self.includeDbstatsMetrics = None


    def connect(self):
        global SERVER_STATUS_METRICS
        self.mongo_client = Connection(host=self.mongo_host, port=self.mongo_port, slave_okay=True)
        if not self.mongo_client.alive():
            collectd.error("mongodb plugin failed to connect to %s:%d" % (self.mongo_host, self.mongo_port))

        server_status = self.mongo_client[self.mongo_dbs[0]].command("serverStatus")
        version = server_status["version"]
        at_least_2_4 = V(version) >= V("2.4.0")
        if not at_least_2_4:
            indexCountersMap = SERVER_STATUS_METRICS.pop("indexCounters")
            SERVER_STATUS_METRICS["indexCounters"] = {"btree": indexCountersMap}


    def disconnect(self):
        if self.mongo_client and self.mongo_client.alive():
            self.mongo_client.disconnect()


    def config(self, obj):
        for node in obj.children:
            if node.key == "Port":
                self.mongo_port = int(node.values[0])
                collectd.info("mongodb plugin: Port " + self.mongo_port)
            elif node.key == "Host":
                self.mongo_host = node.values[0]
                collectd.info("mongodb plugin: Host " + self.Host)
            elif node.key == "User":
                self.mongo_user = node.values[0]
            elif node.key == "Password":
                self.mongo_password = node.values[0]
            elif node.key == "Databases":
                self.mongo_dbs = node.values
                collectd.info("mongodb plugin: Databases " + self.mongo_dbs)
            elif node.key == "ConnectionPoolStatus":
                self.includeConnPoolMetrics = node.values
                collectd.info("mongodb plugin: ConnectionPoolStatus " + self.ConnectionPoolStatus)
            elif node.key == "ServerStats":
                self.includeServerStatsMetrics = node.values
                collectd.info("mongodb plugin: ServerStats " + self.ServerStats)
            elif node.key == "DBStats":
                self.includeDbstatsMetrics = node.values
                collectd.info("mongodb plugin: DBStats " + self.DBStats)
            else:
                collectd.warning("mongodb plugin: Unkown configuration key %s" % node.key)


    def submit(self, instance, type, value, db=None):
        # actually a recursive submit call to dive deeper into nested dict data
        # since the leaf value in the nested dicts is the type, we check on the type type :-)
        if db:
            plugin_instance = "%s-%s" % (self.mongo_port, db)
        else:
            plugin_instance = str(self.mongo_port)
        v = collectd.Values()
        v.plugin = self.plugin_name
        v.plugin_instance = plugin_instance
        v.type = type
        v.type_instance = instance
        v.values = [value, ]
        v.dispatch()


    def recursive_submit(self, type_tree, data_tree, instance_name=None, db=None):
        # if we are still in the middle of the type and data tree
        if isinstance(type_tree, types.DictType) and isinstance(data_tree, types.DictType):
            for type_name, type_value in type_tree.iteritems():
                next_instance_name = None
                if instance_name:
                    next_instance_name = instance_name + "." + type_name
                else:
                    next_instance_name = type_name
                if data_tree.has_key(type_name):
                    self.recursive_submit(type_value, data_tree[type_name], next_instance_name, db=db)
                else:
                    # may want to log this but some mongodb setups may not have anything to report
                    pass
        elif isinstance(type_tree, types.DictType) or isinstance(data_tree, types.DictType):
            print("type tree and data tree structure differ for data instance: " + instance_name)
        else:
            self.submit(instance_name, type_tree, data_tree, db)


    def publish_connection_pool_metrics(self):
        # connPoolStats produces the same results regardless of db used
        db = self.mongo_client[self.mongo_dbs[0]]
        if self.mongo_user and self.mongo_password:
            db.authenticate(self.mongo_user, self.mongo_password)

        conn_pool_stats = db.command("connPoolStats")
        metrics_to_collect = {}
        if self.includeConnPoolMetrics:
            for root_metric_key in self.includeConnPoolMetrics.iterkeys():
                if conn_pool_stats.has_key(root_metric_key):
                    metrics_to_collect[root_metric_key] = deepcopy(CONNECTION_POOL_STATUS_METRICS[root_metric_key])
        else:
            metrics_to_collect = CONNECTION_POOL_STATUS_METRICS

        self.recursive_submit(metrics_to_collect, conn_pool_stats)


    def publish_dbstats(self):
        for db_name in self.mongo_dbs:
            db = self.mongo_client[db_name]
            if self.mongo_user and self.mongo_password:
                db.authenticate(self.mongo_user, self.mongo_password)

            dbstats = db.command("dbStats")
            metrics_to_collect = {}
            if self.includeDbstatsMetrics:
                for root_metric_key in self.includeDbstatsMetrics.iterkeys():
                    if dbstats.has_key(root_metric_key):
                        metrics_to_collect[root_metric_key] = deepcopy(DBSTATS_METRICS[root_metric_key])
            else:
                metrics_to_collect = DBSTATS_METRICS

            self.recursive_submit(metrics_to_collect, dbstats, db=db_name)


    def publish_server_status(self):
        # serverStatus produces the same results regardless of db used
        db = self.mongo_client[self.mongo_dbs[0]]
        if self.mongo_user and self.mongo_password:
            db.authenticate(self.mongo_user, self.mongo_password)

        server_status = db.command("serverStatus")
        metrics_to_collect = {}
        if self.includeServerStatsMetrics:
            for root_metric_key in self.includeServerStatsMetrics.iterkeys():
                if server_status.has_key(root_metric_key):
                    metrics_to_collect[root_metric_key] = deepcopy(SERVER_STATUS_METRICS[root_metric_key])
        else:
            metrics_to_collect = deepcopy(SERVER_STATUS_METRICS)
        # rename "." lock to be "GLOBAL"
        if metrics_to_collect["locks"].has_key("."):
            print(SERVER_STATUS_METRICS["locks"])
            global_lock_data = metrics_to_collect["locks"].pop(".")
            metrics_to_collect["locks"]["GLOBAL"] = global_lock_data

            print(SERVER_STATUS_METRICS["locks"])
        for db_name in self.mongo_dbs:
            metrics_to_collect["locks"][db_name] = deepcopy(SERVER_STATUS_METRICS["locks"]["."])

        self.recursive_submit(metrics_to_collect, server_status)


    def publish_data(self):
        self.publish_server_status()
        self.publish_connection_pool_metrics()
        self.publish_dbstats()
Esempio n. 7
0
class MonitorCronJob(object):


	def __init__(self):
		self.mongo_host = MONGO_HOST
		self.mongo_port = MONGO_PORT
		self.redis_host = REDIS_HOST
		self.redis_port = REDIS_PORT
		self.rmap = REDIS_MAP
		self.emap = EXTRA_MAP
		self.ulist = '%s:start_urls' % SPIDER
		self.dupefilter_key = '%s:dupefilter' % SPIDER

		self._connectDB()
		self._connectRedis()
		self._cleanRedis()

	def __del__(self):
		self._disconnectDB()


	def _connectDB(self):
		self.c 			= Connection(self.mongo_host, self.mongo_port)
		self.db 		= self.c[TASK_DB]
		self.collection = self.db[TASK_COLLECTION]
		print "[log] Connect to MongoDB %s:%s" % (self.mongo_host, self.mongo_port)

	def _disconnectDB(self):
		self.c.disconnect()
		del self.c, self.db, self.collection
		print "[log] Disconnect from MongoDB %s:%s" % (self.mongo_host, self.mongo_port)

	def _alive(self):
		return True if self.c and self.c.alive() else False

	def _connectRedis(self):
		self.r = redis.Redis(host=self.redis_host, port=self.redis_port)
		print "[log] Connect to Redis %s:%s" % (self.redis_host, self.redis_port)

	def _cleanRedis(self):
		self.r.delete( self.rmap )
		print "[log] Clean url hash map of %s" % self.rmap
		self.r.delete( self.emap )
		print "[log] Clean extra url hash map of %s" % self.emap
		self.r.delete( self.dupefilter_key )
		print "[log] Clean dupefilter_key of %s" % self.dupefilter_key


	def read_task(self, rto_type='cursor'):
		""" Get tasks from MongoDB, then return a list
		"""
		if self._alive():

			cursor = self.collection.find({'state': 1})

			if rto_type == 'cursor':
				print "[log] Read %d tasks from DB" % cursor.count()
				return cursor

			elif rto_type == 'list':
				tasks = []
				for i in cursor:
					tasks.append( i )
				print "[log] Read %d tasks from DB" % len(tasks)
				return tasks
		
		else:
			pass
			

	def map_tasks(self):
		""" 
		Map tasks' url and SKU
		----------------------

		Task's structure
		{
			'sku' : string, product sku of feifei,
			'urls' : list, list of this task's urls
		}
		"""
		def to_redis(url):
			self.r.lpush( self.ulist, url )

		tasks = self.read_task( 'cursor' )

		for i in tasks:
			sku = i.get('sku', None)
			urls = i.get('urls', None)
			extras = i.get('extras', None)
			if sku:
				if urls:
					for url in urls:
						self.r.hset( self.rmap, url, sku ) # hset(hash, key, value)
						to_redis( url )
						print "[log] List %s to %s" % (url, self.ulist)
				if extras:
					for extra in extras:
						self.r.hset( self.emap, extra, sku )
						to_redis( extra )
						print "[log] List %s to %s" % (extra, self.ulist)


	def test(self):

		with open('test.txt', 'ab') as f:
			f.write('1\n')


	def __getattribute__(self, name): 
		try:
		    rt = object.__getattribute__(self, name)
		except:
		    rt = None 
		return rt